0951b91180c424b56aa0bb5edbcb83530ee775e3
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
110
111 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
112 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
113 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
114 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
115 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
116 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
117 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
118 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
119 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
120 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
121 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
122 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
123
124 enum ta_ras_gfx_subblock {
125         /*CPC*/
126         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
127         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
128         TA_RAS_BLOCK__GFX_CPC_UCODE,
129         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
130         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
131         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
132         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
133         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
134         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
135         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
136         /* CPF*/
137         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
138         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
139         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
140         TA_RAS_BLOCK__GFX_CPF_TAG,
141         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
142         /* CPG*/
143         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
144         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
145         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
146         TA_RAS_BLOCK__GFX_CPG_TAG,
147         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
148         /* GDS*/
149         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
150         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
151         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
152         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
153         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
154         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
155         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
156         /* SPI*/
157         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
158         /* SQ*/
159         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
160         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
161         TA_RAS_BLOCK__GFX_SQ_LDS_D,
162         TA_RAS_BLOCK__GFX_SQ_LDS_I,
163         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
164         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
165         /* SQC (3 ranges)*/
166         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
167         /* SQC range 0*/
168         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
169         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
170                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
171         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
172         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
173         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
174         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
175         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
176         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
177         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
178                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
179         /* SQC range 1*/
180         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
181         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
182                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
183         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
184         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
185         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
186         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
187         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
188         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
189         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
190         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
191         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
192                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
193         /* SQC range 2*/
194         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
196                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
197         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
198         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
199         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
201         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
202         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
203         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
204         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
205         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
206                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
207         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
208         /* TA*/
209         TA_RAS_BLOCK__GFX_TA_INDEX_START,
210         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
211         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
212         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
213         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
214         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
215         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
216         /* TCA*/
217         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
218         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
219         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
220         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
221         /* TCC (5 sub-ranges)*/
222         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
223         /* TCC range 0*/
224         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
225         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
226         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
227         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
228         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
229         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
230         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
231         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
232         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
233         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
234         /* TCC range 1*/
235         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
236         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
237         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
238         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
239                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
240         /* TCC range 2*/
241         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
242         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
243         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
244         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
245         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
246         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
247         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
248         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
249         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
250         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
251                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
252         /* TCC range 3*/
253         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
254         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
255         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
256         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
257                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
258         /* TCC range 4*/
259         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
260         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
261                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
262         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
263         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
264                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
265         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
266         /* TCI*/
267         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
268         /* TCP*/
269         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
270         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
271         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
272         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
273         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
274         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
275         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
276         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
277         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
278         /* TD*/
279         TA_RAS_BLOCK__GFX_TD_INDEX_START,
280         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
281         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
282         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
283         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
284         /* EA (3 sub-ranges)*/
285         TA_RAS_BLOCK__GFX_EA_INDEX_START,
286         /* EA range 0*/
287         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
288         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
289         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
290         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
291         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
292         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
293         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
294         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
295         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
296         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
297         /* EA range 1*/
298         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
299         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
300         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
301         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
303         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
304         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
306         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
307         /* EA range 2*/
308         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
309         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
310         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
311         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
312         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
313         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
314         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
315         /* UTC VM L2 bank*/
316         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
317         /* UTC VM walker*/
318         TA_RAS_BLOCK__UTC_VML2_WALKER,
319         /* UTC ATC L2 2MB cache*/
320         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
321         /* UTC ATC L2 4KB cache*/
322         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
323         TA_RAS_BLOCK__GFX_MAX
324 };
325
326 struct ras_gfx_subblock {
327         unsigned char *name;
328         int ta_subblock;
329         int hw_supported_error_type;
330         int sw_supported_error_type;
331 };
332
333 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
334         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
335                 #subblock,                                                     \
336                 TA_RAS_BLOCK__##subblock,                                      \
337                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
338                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
339         }
340
341 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
342         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
343         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
344         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
345         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
346         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
347         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
348         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
349         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
350         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
351         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
352         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
353         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
354         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
355         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
356         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
359                              0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
361                              0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
363         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
365         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
366         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
369         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
370                              0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
372                              0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
374                              0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
376                              0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
378                              0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
380                              0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
382                              1),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
384                              0, 0, 0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
386                              0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
390                              0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
392                              0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
394                              0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
396                              0, 0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
402                              0, 0, 0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
414                              0, 0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
418         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
420         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
422         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
424         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
425         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
426                              1),
427         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
428                              1),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
430                              1),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
432                              0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
434                              0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
438         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
440         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
447                              0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
450                              0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
452                              0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
454                              0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
467         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
489 };
490
491 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
492 {
493         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
494         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
495         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
496         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
497         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
498         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
499         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
500         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
501         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
502         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
513 };
514
515 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
516 {
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
535 };
536
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
538 {
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
550 };
551
552 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
553 {
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
560         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
562         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
578 };
579
580 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
581 {
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
589 };
590
591 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
592 {
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
602         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 };
613
614 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
615 {
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
619 };
620
621 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
622 {
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
639 };
640
641 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
642 {
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
656 };
657
658 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
659 {
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
668 };
669
670 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
671 {
672         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
673         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
674         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
675         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
676         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
677         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
678         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
679         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
680 };
681
682 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
683 {
684         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
685         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
686         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
687         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
688         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
689         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
690         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
691         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
692 };
693
694 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
695 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
696 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
697 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
698
699 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
700 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
701 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
702 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
703 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
704                                  struct amdgpu_cu_info *cu_info);
705 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
706 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
707 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
708 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
709 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
710                                           void *ras_error_status);
711 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
712                                      void *inject_if);
713
714 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
715 {
716         switch (adev->asic_type) {
717         case CHIP_VEGA10:
718                 soc15_program_register_sequence(adev,
719                                                 golden_settings_gc_9_0,
720                                                 ARRAY_SIZE(golden_settings_gc_9_0));
721                 soc15_program_register_sequence(adev,
722                                                 golden_settings_gc_9_0_vg10,
723                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
724                 break;
725         case CHIP_VEGA12:
726                 soc15_program_register_sequence(adev,
727                                                 golden_settings_gc_9_2_1,
728                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
729                 soc15_program_register_sequence(adev,
730                                                 golden_settings_gc_9_2_1_vg12,
731                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
732                 break;
733         case CHIP_VEGA20:
734                 soc15_program_register_sequence(adev,
735                                                 golden_settings_gc_9_0,
736                                                 ARRAY_SIZE(golden_settings_gc_9_0));
737                 soc15_program_register_sequence(adev,
738                                                 golden_settings_gc_9_0_vg20,
739                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
740                 break;
741         case CHIP_ARCTURUS:
742                 soc15_program_register_sequence(adev,
743                                                 golden_settings_gc_9_4_1_arct,
744                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
745                 break;
746         case CHIP_RAVEN:
747                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
748                                                 ARRAY_SIZE(golden_settings_gc_9_1));
749                 if (adev->rev_id >= 8)
750                         soc15_program_register_sequence(adev,
751                                                         golden_settings_gc_9_1_rv2,
752                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
753                 else
754                         soc15_program_register_sequence(adev,
755                                                         golden_settings_gc_9_1_rv1,
756                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
757                 break;
758         default:
759                 break;
760         }
761
762         if (adev->asic_type != CHIP_ARCTURUS)
763                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
764                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
765 }
766
767 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
768 {
769         adev->gfx.scratch.num_reg = 8;
770         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
771         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773
774 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
775                                        bool wc, uint32_t reg, uint32_t val)
776 {
777         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
778         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
779                                 WRITE_DATA_DST_SEL(0) |
780                                 (wc ? WR_CONFIRM : 0));
781         amdgpu_ring_write(ring, reg);
782         amdgpu_ring_write(ring, 0);
783         amdgpu_ring_write(ring, val);
784 }
785
786 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
787                                   int mem_space, int opt, uint32_t addr0,
788                                   uint32_t addr1, uint32_t ref, uint32_t mask,
789                                   uint32_t inv)
790 {
791         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
792         amdgpu_ring_write(ring,
793                                  /* memory (1) or register (0) */
794                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
795                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
796                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
797                                  WAIT_REG_MEM_ENGINE(eng_sel)));
798
799         if (mem_space)
800                 BUG_ON(addr0 & 0x3); /* Dword align */
801         amdgpu_ring_write(ring, addr0);
802         amdgpu_ring_write(ring, addr1);
803         amdgpu_ring_write(ring, ref);
804         amdgpu_ring_write(ring, mask);
805         amdgpu_ring_write(ring, inv); /* poll interval */
806 }
807
808 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
809 {
810         struct amdgpu_device *adev = ring->adev;
811         uint32_t scratch;
812         uint32_t tmp = 0;
813         unsigned i;
814         int r;
815
816         r = amdgpu_gfx_scratch_get(adev, &scratch);
817         if (r)
818                 return r;
819
820         WREG32(scratch, 0xCAFEDEAD);
821         r = amdgpu_ring_alloc(ring, 3);
822         if (r)
823                 goto error_free_scratch;
824
825         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
826         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
827         amdgpu_ring_write(ring, 0xDEADBEEF);
828         amdgpu_ring_commit(ring);
829
830         for (i = 0; i < adev->usec_timeout; i++) {
831                 tmp = RREG32(scratch);
832                 if (tmp == 0xDEADBEEF)
833                         break;
834                 udelay(1);
835         }
836
837         if (i >= adev->usec_timeout)
838                 r = -ETIMEDOUT;
839
840 error_free_scratch:
841         amdgpu_gfx_scratch_free(adev, scratch);
842         return r;
843 }
844
845 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
846 {
847         struct amdgpu_device *adev = ring->adev;
848         struct amdgpu_ib ib;
849         struct dma_fence *f = NULL;
850
851         unsigned index;
852         uint64_t gpu_addr;
853         uint32_t tmp;
854         long r;
855
856         r = amdgpu_device_wb_get(adev, &index);
857         if (r)
858                 return r;
859
860         gpu_addr = adev->wb.gpu_addr + (index * 4);
861         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
862         memset(&ib, 0, sizeof(ib));
863         r = amdgpu_ib_get(adev, NULL, 16, &ib);
864         if (r)
865                 goto err1;
866
867         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
868         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
869         ib.ptr[2] = lower_32_bits(gpu_addr);
870         ib.ptr[3] = upper_32_bits(gpu_addr);
871         ib.ptr[4] = 0xDEADBEEF;
872         ib.length_dw = 5;
873
874         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
875         if (r)
876                 goto err2;
877
878         r = dma_fence_wait_timeout(f, false, timeout);
879         if (r == 0) {
880                 r = -ETIMEDOUT;
881                 goto err2;
882         } else if (r < 0) {
883                 goto err2;
884         }
885
886         tmp = adev->wb.wb[index];
887         if (tmp == 0xDEADBEEF)
888                 r = 0;
889         else
890                 r = -EINVAL;
891
892 err2:
893         amdgpu_ib_free(adev, &ib, NULL);
894         dma_fence_put(f);
895 err1:
896         amdgpu_device_wb_free(adev, index);
897         return r;
898 }
899
900
901 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
902 {
903         release_firmware(adev->gfx.pfp_fw);
904         adev->gfx.pfp_fw = NULL;
905         release_firmware(adev->gfx.me_fw);
906         adev->gfx.me_fw = NULL;
907         release_firmware(adev->gfx.ce_fw);
908         adev->gfx.ce_fw = NULL;
909         release_firmware(adev->gfx.rlc_fw);
910         adev->gfx.rlc_fw = NULL;
911         release_firmware(adev->gfx.mec_fw);
912         adev->gfx.mec_fw = NULL;
913         release_firmware(adev->gfx.mec2_fw);
914         adev->gfx.mec2_fw = NULL;
915
916         kfree(adev->gfx.rlc.register_list_format);
917 }
918
919 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
920 {
921         const struct rlc_firmware_header_v2_1 *rlc_hdr;
922
923         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
924         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
925         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
926         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
927         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
928         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
929         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
930         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
931         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
932         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
933         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
934         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
935         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
936         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
937                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
938 }
939
940 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
941 {
942         adev->gfx.me_fw_write_wait = false;
943         adev->gfx.mec_fw_write_wait = false;
944
945         switch (adev->asic_type) {
946         case CHIP_VEGA10:
947                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
948                     (adev->gfx.me_feature_version >= 42) &&
949                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
950                     (adev->gfx.pfp_feature_version >= 42))
951                         adev->gfx.me_fw_write_wait = true;
952
953                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
954                     (adev->gfx.mec_feature_version >= 42))
955                         adev->gfx.mec_fw_write_wait = true;
956                 break;
957         case CHIP_VEGA12:
958                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
959                     (adev->gfx.me_feature_version >= 44) &&
960                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
961                     (adev->gfx.pfp_feature_version >= 44))
962                         adev->gfx.me_fw_write_wait = true;
963
964                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
965                     (adev->gfx.mec_feature_version >= 44))
966                         adev->gfx.mec_fw_write_wait = true;
967                 break;
968         case CHIP_VEGA20:
969                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
970                     (adev->gfx.me_feature_version >= 44) &&
971                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
972                     (adev->gfx.pfp_feature_version >= 44))
973                         adev->gfx.me_fw_write_wait = true;
974
975                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
976                     (adev->gfx.mec_feature_version >= 44))
977                         adev->gfx.mec_fw_write_wait = true;
978                 break;
979         case CHIP_RAVEN:
980                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
981                     (adev->gfx.me_feature_version >= 42) &&
982                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
983                     (adev->gfx.pfp_feature_version >= 42))
984                         adev->gfx.me_fw_write_wait = true;
985
986                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
987                     (adev->gfx.mec_feature_version >= 42))
988                         adev->gfx.mec_fw_write_wait = true;
989                 break;
990         default:
991                 break;
992         }
993 }
994
995 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
996 {
997         switch (adev->asic_type) {
998         case CHIP_VEGA10:
999         case CHIP_VEGA12:
1000         case CHIP_VEGA20:
1001                 break;
1002         case CHIP_RAVEN:
1003                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1004                         break;
1005                 if ((adev->gfx.rlc_fw_version != 106 &&
1006                      adev->gfx.rlc_fw_version < 531) ||
1007                     (adev->gfx.rlc_fw_version == 53815) ||
1008                     (adev->gfx.rlc_feature_version < 1) ||
1009                     !adev->gfx.rlc.is_rlc_v2_1)
1010                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1011                 break;
1012         default:
1013                 break;
1014         }
1015 }
1016
1017 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1018                                           const char *chip_name)
1019 {
1020         char fw_name[30];
1021         int err;
1022         struct amdgpu_firmware_info *info = NULL;
1023         const struct common_firmware_header *header = NULL;
1024         const struct gfx_firmware_header_v1_0 *cp_hdr;
1025
1026         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1027         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1028         if (err)
1029                 goto out;
1030         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1031         if (err)
1032                 goto out;
1033         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1034         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1035         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1036
1037         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1038         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1039         if (err)
1040                 goto out;
1041         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1042         if (err)
1043                 goto out;
1044         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1045         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1046         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1047
1048         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1049         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050         if (err)
1051                 goto out;
1052         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1053         if (err)
1054                 goto out;
1055         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1056         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1057         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1058
1059         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1060                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1061                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1062                 info->fw = adev->gfx.pfp_fw;
1063                 header = (const struct common_firmware_header *)info->fw->data;
1064                 adev->firmware.fw_size +=
1065                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1066
1067                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1068                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1069                 info->fw = adev->gfx.me_fw;
1070                 header = (const struct common_firmware_header *)info->fw->data;
1071                 adev->firmware.fw_size +=
1072                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1073
1074                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1075                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1076                 info->fw = adev->gfx.ce_fw;
1077                 header = (const struct common_firmware_header *)info->fw->data;
1078                 adev->firmware.fw_size +=
1079                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1080         }
1081
1082 out:
1083         if (err) {
1084                 dev_err(adev->dev,
1085                         "gfx9: Failed to load firmware \"%s\"\n",
1086                         fw_name);
1087                 release_firmware(adev->gfx.pfp_fw);
1088                 adev->gfx.pfp_fw = NULL;
1089                 release_firmware(adev->gfx.me_fw);
1090                 adev->gfx.me_fw = NULL;
1091                 release_firmware(adev->gfx.ce_fw);
1092                 adev->gfx.ce_fw = NULL;
1093         }
1094         return err;
1095 }
1096
1097 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1098                                           const char *chip_name)
1099 {
1100         char fw_name[30];
1101         int err;
1102         struct amdgpu_firmware_info *info = NULL;
1103         const struct common_firmware_header *header = NULL;
1104         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1105         unsigned int *tmp = NULL;
1106         unsigned int i = 0;
1107         uint16_t version_major;
1108         uint16_t version_minor;
1109         uint32_t smu_version;
1110
1111         /*
1112          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1113          * instead of picasso_rlc.bin.
1114          * Judgment method:
1115          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1116          *          or revision >= 0xD8 && revision <= 0xDF
1117          * otherwise is PCO FP5
1118          */
1119         if (!strcmp(chip_name, "picasso") &&
1120                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1121                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1122                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1123         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1124                 (smu_version >= 0x41e2b))
1125                 /**
1126                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1127                 */
1128                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1129         else
1130                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1131         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1132         if (err)
1133                 goto out;
1134         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1135         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1136
1137         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1138         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1139         if (version_major == 2 && version_minor == 1)
1140                 adev->gfx.rlc.is_rlc_v2_1 = true;
1141
1142         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1143         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1144         adev->gfx.rlc.save_and_restore_offset =
1145                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1146         adev->gfx.rlc.clear_state_descriptor_offset =
1147                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1148         adev->gfx.rlc.avail_scratch_ram_locations =
1149                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1150         adev->gfx.rlc.reg_restore_list_size =
1151                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1152         adev->gfx.rlc.reg_list_format_start =
1153                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1154         adev->gfx.rlc.reg_list_format_separate_start =
1155                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1156         adev->gfx.rlc.starting_offsets_start =
1157                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1158         adev->gfx.rlc.reg_list_format_size_bytes =
1159                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1160         adev->gfx.rlc.reg_list_size_bytes =
1161                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1162         adev->gfx.rlc.register_list_format =
1163                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1164                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1165         if (!adev->gfx.rlc.register_list_format) {
1166                 err = -ENOMEM;
1167                 goto out;
1168         }
1169
1170         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1171                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1172         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1173                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1174
1175         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1176
1177         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1178                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1179         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1180                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1181
1182         if (adev->gfx.rlc.is_rlc_v2_1)
1183                 gfx_v9_0_init_rlc_ext_microcode(adev);
1184
1185         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1186                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188                 info->fw = adev->gfx.rlc_fw;
1189                 header = (const struct common_firmware_header *)info->fw->data;
1190                 adev->firmware.fw_size +=
1191                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192
1193                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1194                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1195                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1196                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1197                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1198                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1199                         info->fw = adev->gfx.rlc_fw;
1200                         adev->firmware.fw_size +=
1201                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1202
1203                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1204                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1205                         info->fw = adev->gfx.rlc_fw;
1206                         adev->firmware.fw_size +=
1207                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1208
1209                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1210                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1211                         info->fw = adev->gfx.rlc_fw;
1212                         adev->firmware.fw_size +=
1213                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1214                 }
1215         }
1216
1217 out:
1218         if (err) {
1219                 dev_err(adev->dev,
1220                         "gfx9: Failed to load firmware \"%s\"\n",
1221                         fw_name);
1222                 release_firmware(adev->gfx.rlc_fw);
1223                 adev->gfx.rlc_fw = NULL;
1224         }
1225         return err;
1226 }
1227
1228 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1229                                           const char *chip_name)
1230 {
1231         char fw_name[30];
1232         int err;
1233         struct amdgpu_firmware_info *info = NULL;
1234         const struct common_firmware_header *header = NULL;
1235         const struct gfx_firmware_header_v1_0 *cp_hdr;
1236
1237         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1238         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1239         if (err)
1240                 goto out;
1241         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1242         if (err)
1243                 goto out;
1244         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1245         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1246         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1247
1248
1249         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1250         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1251         if (!err) {
1252                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1253                 if (err)
1254                         goto out;
1255                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1256                 adev->gfx.mec2_fw->data;
1257                 adev->gfx.mec2_fw_version =
1258                 le32_to_cpu(cp_hdr->header.ucode_version);
1259                 adev->gfx.mec2_feature_version =
1260                 le32_to_cpu(cp_hdr->ucode_feature_version);
1261         } else {
1262                 err = 0;
1263                 adev->gfx.mec2_fw = NULL;
1264         }
1265
1266         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1267                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1268                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1269                 info->fw = adev->gfx.mec_fw;
1270                 header = (const struct common_firmware_header *)info->fw->data;
1271                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1272                 adev->firmware.fw_size +=
1273                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1274
1275                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1276                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1277                 info->fw = adev->gfx.mec_fw;
1278                 adev->firmware.fw_size +=
1279                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1280
1281                 if (adev->gfx.mec2_fw) {
1282                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1283                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1284                         info->fw = adev->gfx.mec2_fw;
1285                         header = (const struct common_firmware_header *)info->fw->data;
1286                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1287                         adev->firmware.fw_size +=
1288                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1289
1290                         /* TODO: Determine if MEC2 JT FW loading can be removed
1291                                  for all GFX V9 asic and above */
1292                         if (adev->asic_type != CHIP_ARCTURUS) {
1293                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1294                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1295                                 info->fw = adev->gfx.mec2_fw;
1296                                 adev->firmware.fw_size +=
1297                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1298                                         PAGE_SIZE);
1299                         }
1300                 }
1301         }
1302
1303 out:
1304         gfx_v9_0_check_if_need_gfxoff(adev);
1305         gfx_v9_0_check_fw_write_wait(adev);
1306         if (err) {
1307                 dev_err(adev->dev,
1308                         "gfx9: Failed to load firmware \"%s\"\n",
1309                         fw_name);
1310                 release_firmware(adev->gfx.mec_fw);
1311                 adev->gfx.mec_fw = NULL;
1312                 release_firmware(adev->gfx.mec2_fw);
1313                 adev->gfx.mec2_fw = NULL;
1314         }
1315         return err;
1316 }
1317
1318 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1319 {
1320         const char *chip_name;
1321         int r;
1322
1323         DRM_DEBUG("\n");
1324
1325         switch (adev->asic_type) {
1326         case CHIP_VEGA10:
1327                 chip_name = "vega10";
1328                 break;
1329         case CHIP_VEGA12:
1330                 chip_name = "vega12";
1331                 break;
1332         case CHIP_VEGA20:
1333                 chip_name = "vega20";
1334                 break;
1335         case CHIP_RAVEN:
1336                 if (adev->rev_id >= 8)
1337                         chip_name = "raven2";
1338                 else if (adev->pdev->device == 0x15d8)
1339                         chip_name = "picasso";
1340                 else
1341                         chip_name = "raven";
1342                 break;
1343         case CHIP_ARCTURUS:
1344                 chip_name = "arcturus";
1345                 break;
1346         default:
1347                 BUG();
1348         }
1349
1350         /* No CPG in Arcturus */
1351         if (adev->asic_type != CHIP_ARCTURUS) {
1352                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1353                 if (r)
1354                         return r;
1355         }
1356
1357         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1358         if (r)
1359                 return r;
1360
1361         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1362         if (r)
1363                 return r;
1364
1365         return r;
1366 }
1367
1368 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1369 {
1370         u32 count = 0;
1371         const struct cs_section_def *sect = NULL;
1372         const struct cs_extent_def *ext = NULL;
1373
1374         /* begin clear state */
1375         count += 2;
1376         /* context control state */
1377         count += 3;
1378
1379         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1380                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1381                         if (sect->id == SECT_CONTEXT)
1382                                 count += 2 + ext->reg_count;
1383                         else
1384                                 return 0;
1385                 }
1386         }
1387
1388         /* end clear state */
1389         count += 2;
1390         /* clear state */
1391         count += 2;
1392
1393         return count;
1394 }
1395
1396 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1397                                     volatile u32 *buffer)
1398 {
1399         u32 count = 0, i;
1400         const struct cs_section_def *sect = NULL;
1401         const struct cs_extent_def *ext = NULL;
1402
1403         if (adev->gfx.rlc.cs_data == NULL)
1404                 return;
1405         if (buffer == NULL)
1406                 return;
1407
1408         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1409         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1410
1411         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1412         buffer[count++] = cpu_to_le32(0x80000000);
1413         buffer[count++] = cpu_to_le32(0x80000000);
1414
1415         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1416                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1417                         if (sect->id == SECT_CONTEXT) {
1418                                 buffer[count++] =
1419                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1420                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1421                                                 PACKET3_SET_CONTEXT_REG_START);
1422                                 for (i = 0; i < ext->reg_count; i++)
1423                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1424                         } else {
1425                                 return;
1426                         }
1427                 }
1428         }
1429
1430         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1431         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1432
1433         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1434         buffer[count++] = cpu_to_le32(0);
1435 }
1436
1437 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1438 {
1439         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1440         uint32_t pg_always_on_cu_num = 2;
1441         uint32_t always_on_cu_num;
1442         uint32_t i, j, k;
1443         uint32_t mask, cu_bitmap, counter;
1444
1445         if (adev->flags & AMD_IS_APU)
1446                 always_on_cu_num = 4;
1447         else if (adev->asic_type == CHIP_VEGA12)
1448                 always_on_cu_num = 8;
1449         else
1450                 always_on_cu_num = 12;
1451
1452         mutex_lock(&adev->grbm_idx_mutex);
1453         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1454                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1455                         mask = 1;
1456                         cu_bitmap = 0;
1457                         counter = 0;
1458                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1459
1460                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1461                                 if (cu_info->bitmap[i][j] & mask) {
1462                                         if (counter == pg_always_on_cu_num)
1463                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1464                                         if (counter < always_on_cu_num)
1465                                                 cu_bitmap |= mask;
1466                                         else
1467                                                 break;
1468                                         counter++;
1469                                 }
1470                                 mask <<= 1;
1471                         }
1472
1473                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1474                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1475                 }
1476         }
1477         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1478         mutex_unlock(&adev->grbm_idx_mutex);
1479 }
1480
1481 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1482 {
1483         uint32_t data;
1484
1485         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1486         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1487         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1488         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1489         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1490
1491         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1492         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1493
1494         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1495         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1496
1497         mutex_lock(&adev->grbm_idx_mutex);
1498         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1499         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1500         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1501
1502         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1503         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1504         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1505         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1506         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1507
1508         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1509         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1510         data &= 0x0000FFFF;
1511         data |= 0x00C00000;
1512         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1513
1514         /*
1515          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1516          * programmed in gfx_v9_0_init_always_on_cu_mask()
1517          */
1518
1519         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1520          * but used for RLC_LB_CNTL configuration */
1521         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1522         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1523         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1524         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1525         mutex_unlock(&adev->grbm_idx_mutex);
1526
1527         gfx_v9_0_init_always_on_cu_mask(adev);
1528 }
1529
1530 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1531 {
1532         uint32_t data;
1533
1534         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1535         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1536         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1537         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1538         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1539
1540         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1541         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1542
1543         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1544         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1545
1546         mutex_lock(&adev->grbm_idx_mutex);
1547         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1548         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1549         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1550
1551         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1552         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1553         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1554         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1555         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1556
1557         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1558         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1559         data &= 0x0000FFFF;
1560         data |= 0x00C00000;
1561         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1562
1563         /*
1564          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1565          * programmed in gfx_v9_0_init_always_on_cu_mask()
1566          */
1567
1568         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1569          * but used for RLC_LB_CNTL configuration */
1570         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1571         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1572         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1573         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1574         mutex_unlock(&adev->grbm_idx_mutex);
1575
1576         gfx_v9_0_init_always_on_cu_mask(adev);
1577 }
1578
1579 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1580 {
1581         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1582 }
1583
1584 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1585 {
1586         return 5;
1587 }
1588
1589 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1590 {
1591         const struct cs_section_def *cs_data;
1592         int r;
1593
1594         adev->gfx.rlc.cs_data = gfx9_cs_data;
1595
1596         cs_data = adev->gfx.rlc.cs_data;
1597
1598         if (cs_data) {
1599                 /* init clear state block */
1600                 r = amdgpu_gfx_rlc_init_csb(adev);
1601                 if (r)
1602                         return r;
1603         }
1604
1605         if (adev->asic_type == CHIP_RAVEN) {
1606                 /* TODO: double check the cp_table_size for RV */
1607                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1608                 r = amdgpu_gfx_rlc_init_cpt(adev);
1609                 if (r)
1610                         return r;
1611         }
1612
1613         switch (adev->asic_type) {
1614         case CHIP_RAVEN:
1615                 gfx_v9_0_init_lbpw(adev);
1616                 break;
1617         case CHIP_VEGA20:
1618                 gfx_v9_4_init_lbpw(adev);
1619                 break;
1620         default:
1621                 break;
1622         }
1623
1624         return 0;
1625 }
1626
1627 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1628 {
1629         int r;
1630
1631         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1632         if (unlikely(r != 0))
1633                 return r;
1634
1635         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1636                         AMDGPU_GEM_DOMAIN_VRAM);
1637         if (!r)
1638                 adev->gfx.rlc.clear_state_gpu_addr =
1639                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1640
1641         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1642
1643         return r;
1644 }
1645
1646 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1647 {
1648         int r;
1649
1650         if (!adev->gfx.rlc.clear_state_obj)
1651                 return;
1652
1653         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1654         if (likely(r == 0)) {
1655                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1656                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1657         }
1658 }
1659
1660 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1661 {
1662         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1663         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1664 }
1665
1666 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1667 {
1668         int r;
1669         u32 *hpd;
1670         const __le32 *fw_data;
1671         unsigned fw_size;
1672         u32 *fw;
1673         size_t mec_hpd_size;
1674
1675         const struct gfx_firmware_header_v1_0 *mec_hdr;
1676
1677         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1678
1679         /* take ownership of the relevant compute queues */
1680         amdgpu_gfx_compute_queue_acquire(adev);
1681         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1682
1683         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1684                                       AMDGPU_GEM_DOMAIN_VRAM,
1685                                       &adev->gfx.mec.hpd_eop_obj,
1686                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1687                                       (void **)&hpd);
1688         if (r) {
1689                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1690                 gfx_v9_0_mec_fini(adev);
1691                 return r;
1692         }
1693
1694         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1695
1696         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1697         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1698
1699         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1700
1701         fw_data = (const __le32 *)
1702                 (adev->gfx.mec_fw->data +
1703                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1704         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1705
1706         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1707                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1708                                       &adev->gfx.mec.mec_fw_obj,
1709                                       &adev->gfx.mec.mec_fw_gpu_addr,
1710                                       (void **)&fw);
1711         if (r) {
1712                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1713                 gfx_v9_0_mec_fini(adev);
1714                 return r;
1715         }
1716
1717         memcpy(fw, fw_data, fw_size);
1718
1719         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1720         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1721
1722         return 0;
1723 }
1724
1725 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1726 {
1727         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1728                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1729                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1730                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1731                 (SQ_IND_INDEX__FORCE_READ_MASK));
1732         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1733 }
1734
1735 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1736                            uint32_t wave, uint32_t thread,
1737                            uint32_t regno, uint32_t num, uint32_t *out)
1738 {
1739         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1740                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1741                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1742                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1743                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1744                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1745                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1746         while (num--)
1747                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1748 }
1749
1750 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1751 {
1752         /* type 1 wave data */
1753         dst[(*no_fields)++] = 1;
1754         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1755         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1756         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1757         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1758         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1759         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1760         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1761         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1762         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1763         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1764         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1765         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1766         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1767         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1768 }
1769
1770 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1771                                      uint32_t wave, uint32_t start,
1772                                      uint32_t size, uint32_t *dst)
1773 {
1774         wave_read_regs(
1775                 adev, simd, wave, 0,
1776                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1777 }
1778
1779 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1780                                      uint32_t wave, uint32_t thread,
1781                                      uint32_t start, uint32_t size,
1782                                      uint32_t *dst)
1783 {
1784         wave_read_regs(
1785                 adev, simd, wave, thread,
1786                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1787 }
1788
1789 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1790                                   u32 me, u32 pipe, u32 q, u32 vm)
1791 {
1792         soc15_grbm_select(adev, me, pipe, q, vm);
1793 }
1794
1795 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1796         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1797         .select_se_sh = &gfx_v9_0_select_se_sh,
1798         .read_wave_data = &gfx_v9_0_read_wave_data,
1799         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1800         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1801         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1802         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1803         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1804 };
1805
1806 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1807 {
1808         u32 gb_addr_config;
1809         int err;
1810
1811         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1812
1813         switch (adev->asic_type) {
1814         case CHIP_VEGA10:
1815                 adev->gfx.config.max_hw_contexts = 8;
1816                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1820                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1821                 break;
1822         case CHIP_VEGA12:
1823                 adev->gfx.config.max_hw_contexts = 8;
1824                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1825                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1826                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1827                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1828                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1829                 DRM_INFO("fix gfx.config for vega12\n");
1830                 break;
1831         case CHIP_VEGA20:
1832                 adev->gfx.config.max_hw_contexts = 8;
1833                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1837                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1838                 gb_addr_config &= ~0xf3e777ff;
1839                 gb_addr_config |= 0x22014042;
1840                 /* check vbios table if gpu info is not available */
1841                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1842                 if (err)
1843                         return err;
1844                 break;
1845         case CHIP_RAVEN:
1846                 adev->gfx.config.max_hw_contexts = 8;
1847                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1851                 if (adev->rev_id >= 8)
1852                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1853                 else
1854                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1855                 break;
1856         case CHIP_ARCTURUS:
1857                 adev->gfx.config.max_hw_contexts = 8;
1858                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1859                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1860                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1861                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1862                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1863                 gb_addr_config &= ~0xf3e777ff;
1864                 gb_addr_config |= 0x22014042;
1865                 break;
1866         default:
1867                 BUG();
1868                 break;
1869         }
1870
1871         adev->gfx.config.gb_addr_config = gb_addr_config;
1872
1873         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1874                         REG_GET_FIELD(
1875                                         adev->gfx.config.gb_addr_config,
1876                                         GB_ADDR_CONFIG,
1877                                         NUM_PIPES);
1878
1879         adev->gfx.config.max_tile_pipes =
1880                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1881
1882         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1883                         REG_GET_FIELD(
1884                                         adev->gfx.config.gb_addr_config,
1885                                         GB_ADDR_CONFIG,
1886                                         NUM_BANKS);
1887         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1888                         REG_GET_FIELD(
1889                                         adev->gfx.config.gb_addr_config,
1890                                         GB_ADDR_CONFIG,
1891                                         MAX_COMPRESSED_FRAGS);
1892         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1893                         REG_GET_FIELD(
1894                                         adev->gfx.config.gb_addr_config,
1895                                         GB_ADDR_CONFIG,
1896                                         NUM_RB_PER_SE);
1897         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1898                         REG_GET_FIELD(
1899                                         adev->gfx.config.gb_addr_config,
1900                                         GB_ADDR_CONFIG,
1901                                         NUM_SHADER_ENGINES);
1902         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1903                         REG_GET_FIELD(
1904                                         adev->gfx.config.gb_addr_config,
1905                                         GB_ADDR_CONFIG,
1906                                         PIPE_INTERLEAVE_SIZE));
1907
1908         return 0;
1909 }
1910
1911 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1912                                    struct amdgpu_ngg_buf *ngg_buf,
1913                                    int size_se,
1914                                    int default_size_se)
1915 {
1916         int r;
1917
1918         if (size_se < 0) {
1919                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1920                 return -EINVAL;
1921         }
1922         size_se = size_se ? size_se : default_size_se;
1923
1924         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1925         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1926                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1927                                     &ngg_buf->bo,
1928                                     &ngg_buf->gpu_addr,
1929                                     NULL);
1930         if (r) {
1931                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1932                 return r;
1933         }
1934         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1935
1936         return r;
1937 }
1938
1939 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1940 {
1941         int i;
1942
1943         for (i = 0; i < NGG_BUF_MAX; i++)
1944                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1945                                       &adev->gfx.ngg.buf[i].gpu_addr,
1946                                       NULL);
1947
1948         memset(&adev->gfx.ngg.buf[0], 0,
1949                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1950
1951         adev->gfx.ngg.init = false;
1952
1953         return 0;
1954 }
1955
1956 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1957 {
1958         int r;
1959
1960         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1961                 return 0;
1962
1963         /* GDS reserve memory: 64 bytes alignment */
1964         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1965         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1966         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1967         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1968
1969         /* Primitive Buffer */
1970         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1971                                     amdgpu_prim_buf_per_se,
1972                                     64 * 1024);
1973         if (r) {
1974                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1975                 goto err;
1976         }
1977
1978         /* Position Buffer */
1979         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1980                                     amdgpu_pos_buf_per_se,
1981                                     256 * 1024);
1982         if (r) {
1983                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1984                 goto err;
1985         }
1986
1987         /* Control Sideband */
1988         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1989                                     amdgpu_cntl_sb_buf_per_se,
1990                                     256);
1991         if (r) {
1992                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1993                 goto err;
1994         }
1995
1996         /* Parameter Cache, not created by default */
1997         if (amdgpu_param_buf_per_se <= 0)
1998                 goto out;
1999
2000         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2001                                     amdgpu_param_buf_per_se,
2002                                     512 * 1024);
2003         if (r) {
2004                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
2005                 goto err;
2006         }
2007
2008 out:
2009         adev->gfx.ngg.init = true;
2010         return 0;
2011 err:
2012         gfx_v9_0_ngg_fini(adev);
2013         return r;
2014 }
2015
2016 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2017 {
2018         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2019         int r;
2020         u32 data, base;
2021
2022         if (!amdgpu_ngg)
2023                 return 0;
2024
2025         /* Program buffer size */
2026         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2027                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2028         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2029                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
2030         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2031
2032         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2033                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2034         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2035                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2036         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2037
2038         /* Program buffer base address */
2039         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2040         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2041         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2042
2043         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2044         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2045         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2046
2047         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2048         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2049         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2050
2051         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2052         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2053         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2054
2055         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2056         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2057         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2058
2059         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2060         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2061         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2062
2063         /* Clear GDS reserved memory */
2064         r = amdgpu_ring_alloc(ring, 17);
2065         if (r) {
2066                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2067                           ring->name, r);
2068                 return r;
2069         }
2070
2071         gfx_v9_0_write_data_to_reg(ring, 0, false,
2072                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2073                                    (adev->gds.gds_size +
2074                                     adev->gfx.ngg.gds_reserve_size));
2075
2076         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2077         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2078                                 PACKET3_DMA_DATA_DST_SEL(1) |
2079                                 PACKET3_DMA_DATA_SRC_SEL(2)));
2080         amdgpu_ring_write(ring, 0);
2081         amdgpu_ring_write(ring, 0);
2082         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2083         amdgpu_ring_write(ring, 0);
2084         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2085                                 adev->gfx.ngg.gds_reserve_size);
2086
2087         gfx_v9_0_write_data_to_reg(ring, 0, false,
2088                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2089
2090         amdgpu_ring_commit(ring);
2091
2092         return 0;
2093 }
2094
2095 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2096                                       int mec, int pipe, int queue)
2097 {
2098         int r;
2099         unsigned irq_type;
2100         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2101
2102         ring = &adev->gfx.compute_ring[ring_id];
2103
2104         /* mec0 is me1 */
2105         ring->me = mec + 1;
2106         ring->pipe = pipe;
2107         ring->queue = queue;
2108
2109         ring->ring_obj = NULL;
2110         ring->use_doorbell = true;
2111         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2112         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2113                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2114         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2115
2116         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2117                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2118                 + ring->pipe;
2119
2120         /* type-2 packets are deprecated on MEC, use type-3 instead */
2121         r = amdgpu_ring_init(adev, ring, 1024,
2122                              &adev->gfx.eop_irq, irq_type);
2123         if (r)
2124                 return r;
2125
2126
2127         return 0;
2128 }
2129
2130 static int gfx_v9_0_sw_init(void *handle)
2131 {
2132         int i, j, k, r, ring_id;
2133         struct amdgpu_ring *ring;
2134         struct amdgpu_kiq *kiq;
2135         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2136
2137         switch (adev->asic_type) {
2138         case CHIP_VEGA10:
2139         case CHIP_VEGA12:
2140         case CHIP_VEGA20:
2141         case CHIP_RAVEN:
2142         case CHIP_ARCTURUS:
2143                 adev->gfx.mec.num_mec = 2;
2144                 break;
2145         default:
2146                 adev->gfx.mec.num_mec = 1;
2147                 break;
2148         }
2149
2150         adev->gfx.mec.num_pipe_per_mec = 4;
2151         adev->gfx.mec.num_queue_per_pipe = 8;
2152
2153         /* EOP Event */
2154         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2155         if (r)
2156                 return r;
2157
2158         /* Privileged reg */
2159         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2160                               &adev->gfx.priv_reg_irq);
2161         if (r)
2162                 return r;
2163
2164         /* Privileged inst */
2165         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2166                               &adev->gfx.priv_inst_irq);
2167         if (r)
2168                 return r;
2169
2170         /* ECC error */
2171         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2172                               &adev->gfx.cp_ecc_error_irq);
2173         if (r)
2174                 return r;
2175
2176         /* FUE error */
2177         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2178                               &adev->gfx.cp_ecc_error_irq);
2179         if (r)
2180                 return r;
2181
2182         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2183
2184         gfx_v9_0_scratch_init(adev);
2185
2186         r = gfx_v9_0_init_microcode(adev);
2187         if (r) {
2188                 DRM_ERROR("Failed to load gfx firmware!\n");
2189                 return r;
2190         }
2191
2192         r = adev->gfx.rlc.funcs->init(adev);
2193         if (r) {
2194                 DRM_ERROR("Failed to init rlc BOs!\n");
2195                 return r;
2196         }
2197
2198         r = gfx_v9_0_mec_init(adev);
2199         if (r) {
2200                 DRM_ERROR("Failed to init MEC BOs!\n");
2201                 return r;
2202         }
2203
2204         /* set up the gfx ring */
2205         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2206                 ring = &adev->gfx.gfx_ring[i];
2207                 ring->ring_obj = NULL;
2208                 if (!i)
2209                         sprintf(ring->name, "gfx");
2210                 else
2211                         sprintf(ring->name, "gfx_%d", i);
2212                 ring->use_doorbell = true;
2213                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2214                 r = amdgpu_ring_init(adev, ring, 1024,
2215                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2216                 if (r)
2217                         return r;
2218         }
2219
2220         /* set up the compute queues - allocate horizontally across pipes */
2221         ring_id = 0;
2222         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2223                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2224                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2225                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2226                                         continue;
2227
2228                                 r = gfx_v9_0_compute_ring_init(adev,
2229                                                                ring_id,
2230                                                                i, k, j);
2231                                 if (r)
2232                                         return r;
2233
2234                                 ring_id++;
2235                         }
2236                 }
2237         }
2238
2239         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2240         if (r) {
2241                 DRM_ERROR("Failed to init KIQ BOs!\n");
2242                 return r;
2243         }
2244
2245         kiq = &adev->gfx.kiq;
2246         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2247         if (r)
2248                 return r;
2249
2250         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2251         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2252         if (r)
2253                 return r;
2254
2255         adev->gfx.ce_ram_size = 0x8000;
2256
2257         r = gfx_v9_0_gpu_early_init(adev);
2258         if (r)
2259                 return r;
2260
2261         r = gfx_v9_0_ngg_init(adev);
2262         if (r)
2263                 return r;
2264
2265         return 0;
2266 }
2267
2268
2269 static int gfx_v9_0_sw_fini(void *handle)
2270 {
2271         int i;
2272         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2273
2274         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2275                         adev->gfx.ras_if) {
2276                 struct ras_common_if *ras_if = adev->gfx.ras_if;
2277                 struct ras_ih_if ih_info = {
2278                         .head = *ras_if,
2279                 };
2280
2281                 amdgpu_ras_debugfs_remove(adev, ras_if);
2282                 amdgpu_ras_sysfs_remove(adev, ras_if);
2283                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2284                 amdgpu_ras_feature_enable(adev, ras_if, 0);
2285                 kfree(ras_if);
2286         }
2287
2288         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2289                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2290         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2291                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2292
2293         amdgpu_gfx_mqd_sw_fini(adev);
2294         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2295         amdgpu_gfx_kiq_fini(adev);
2296
2297         gfx_v9_0_mec_fini(adev);
2298         gfx_v9_0_ngg_fini(adev);
2299         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2300         if (adev->asic_type == CHIP_RAVEN) {
2301                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2302                                 &adev->gfx.rlc.cp_table_gpu_addr,
2303                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2304         }
2305         gfx_v9_0_free_microcode(adev);
2306
2307         return 0;
2308 }
2309
2310
2311 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2312 {
2313         /* TODO */
2314 }
2315
2316 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2317 {
2318         u32 data;
2319
2320         if (instance == 0xffffffff)
2321                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2322         else
2323                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2324
2325         if (se_num == 0xffffffff)
2326                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2327         else
2328                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2329
2330         if (sh_num == 0xffffffff)
2331                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2332         else
2333                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2334
2335         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2336 }
2337
2338 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2339 {
2340         u32 data, mask;
2341
2342         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2343         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2344
2345         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2346         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2347
2348         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2349                                          adev->gfx.config.max_sh_per_se);
2350
2351         return (~data) & mask;
2352 }
2353
2354 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2355 {
2356         int i, j;
2357         u32 data;
2358         u32 active_rbs = 0;
2359         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2360                                         adev->gfx.config.max_sh_per_se;
2361
2362         mutex_lock(&adev->grbm_idx_mutex);
2363         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2364                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2365                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2366                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2367                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2368                                                rb_bitmap_width_per_sh);
2369                 }
2370         }
2371         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2372         mutex_unlock(&adev->grbm_idx_mutex);
2373
2374         adev->gfx.config.backend_enable_mask = active_rbs;
2375         adev->gfx.config.num_rbs = hweight32(active_rbs);
2376 }
2377
2378 #define DEFAULT_SH_MEM_BASES    (0x6000)
2379 #define FIRST_COMPUTE_VMID      (8)
2380 #define LAST_COMPUTE_VMID       (16)
2381 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2382 {
2383         int i;
2384         uint32_t sh_mem_config;
2385         uint32_t sh_mem_bases;
2386
2387         /*
2388          * Configure apertures:
2389          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2390          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2391          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2392          */
2393         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2394
2395         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2396                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2397                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2398
2399         mutex_lock(&adev->srbm_mutex);
2400         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2401                 soc15_grbm_select(adev, 0, 0, 0, i);
2402                 /* CP and shaders */
2403                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2404                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2405         }
2406         soc15_grbm_select(adev, 0, 0, 0, 0);
2407         mutex_unlock(&adev->srbm_mutex);
2408 }
2409
2410 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2411 {
2412         int vmid;
2413
2414         /*
2415          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2416          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2417          * the driver can enable them for graphics. VMID0 should maintain
2418          * access so that HWS firmware can save/restore entries.
2419          */
2420         for (vmid = 1; vmid < 16; vmid++) {
2421                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2422                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2423                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2424                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2425         }
2426 }
2427
2428 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2429 {
2430         u32 tmp;
2431         int i;
2432
2433         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2434
2435         gfx_v9_0_tiling_mode_table_init(adev);
2436
2437         gfx_v9_0_setup_rb(adev);
2438         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2439         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2440
2441         /* XXX SH_MEM regs */
2442         /* where to put LDS, scratch, GPUVM in FSA64 space */
2443         mutex_lock(&adev->srbm_mutex);
2444         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2445                 soc15_grbm_select(adev, 0, 0, 0, i);
2446                 /* CP and shaders */
2447                 if (i == 0) {
2448                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2449                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2450                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2451                                             !!amdgpu_noretry);
2452                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2453                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2454                 } else {
2455                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2456                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2457                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2458                                             !!amdgpu_noretry);
2459                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2460                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2461                                 (adev->gmc.private_aperture_start >> 48));
2462                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2463                                 (adev->gmc.shared_aperture_start >> 48));
2464                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2465                 }
2466         }
2467         soc15_grbm_select(adev, 0, 0, 0, 0);
2468
2469         mutex_unlock(&adev->srbm_mutex);
2470
2471         gfx_v9_0_init_compute_vmid(adev);
2472         gfx_v9_0_init_gds_vmid(adev);
2473 }
2474
2475 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2476 {
2477         u32 i, j, k;
2478         u32 mask;
2479
2480         mutex_lock(&adev->grbm_idx_mutex);
2481         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2482                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2483                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2484                         for (k = 0; k < adev->usec_timeout; k++) {
2485                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2486                                         break;
2487                                 udelay(1);
2488                         }
2489                         if (k == adev->usec_timeout) {
2490                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2491                                                       0xffffffff, 0xffffffff);
2492                                 mutex_unlock(&adev->grbm_idx_mutex);
2493                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2494                                          i, j);
2495                                 return;
2496                         }
2497                 }
2498         }
2499         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2500         mutex_unlock(&adev->grbm_idx_mutex);
2501
2502         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2503                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2504                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2505                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2506         for (k = 0; k < adev->usec_timeout; k++) {
2507                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2508                         break;
2509                 udelay(1);
2510         }
2511 }
2512
2513 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2514                                                bool enable)
2515 {
2516         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2517
2518         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2519         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2520         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2521         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2522
2523         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2524 }
2525
2526 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2527 {
2528         /* csib */
2529         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2530                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2531         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2532                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2533         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2534                         adev->gfx.rlc.clear_state_size);
2535 }
2536
2537 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2538                                 int indirect_offset,
2539                                 int list_size,
2540                                 int *unique_indirect_regs,
2541                                 int unique_indirect_reg_count,
2542                                 int *indirect_start_offsets,
2543                                 int *indirect_start_offsets_count,
2544                                 int max_start_offsets_count)
2545 {
2546         int idx;
2547
2548         for (; indirect_offset < list_size; indirect_offset++) {
2549                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2550                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2551                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2552
2553                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2554                         indirect_offset += 2;
2555
2556                         /* look for the matching indice */
2557                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2558                                 if (unique_indirect_regs[idx] ==
2559                                         register_list_format[indirect_offset] ||
2560                                         !unique_indirect_regs[idx])
2561                                         break;
2562                         }
2563
2564                         BUG_ON(idx >= unique_indirect_reg_count);
2565
2566                         if (!unique_indirect_regs[idx])
2567                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2568
2569                         indirect_offset++;
2570                 }
2571         }
2572 }
2573
2574 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2575 {
2576         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2577         int unique_indirect_reg_count = 0;
2578
2579         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2580         int indirect_start_offsets_count = 0;
2581
2582         int list_size = 0;
2583         int i = 0, j = 0;
2584         u32 tmp = 0;
2585
2586         u32 *register_list_format =
2587                 kmemdup(adev->gfx.rlc.register_list_format,
2588                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2589         if (!register_list_format)
2590                 return -ENOMEM;
2591
2592         /* setup unique_indirect_regs array and indirect_start_offsets array */
2593         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2594         gfx_v9_1_parse_ind_reg_list(register_list_format,
2595                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2596                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2597                                     unique_indirect_regs,
2598                                     unique_indirect_reg_count,
2599                                     indirect_start_offsets,
2600                                     &indirect_start_offsets_count,
2601                                     ARRAY_SIZE(indirect_start_offsets));
2602
2603         /* enable auto inc in case it is disabled */
2604         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2605         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2606         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2607
2608         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2609         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2610                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2611         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2612                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2613                         adev->gfx.rlc.register_restore[i]);
2614
2615         /* load indirect register */
2616         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2617                 adev->gfx.rlc.reg_list_format_start);
2618
2619         /* direct register portion */
2620         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2621                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2622                         register_list_format[i]);
2623
2624         /* indirect register portion */
2625         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2626                 if (register_list_format[i] == 0xFFFFFFFF) {
2627                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2628                         continue;
2629                 }
2630
2631                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2632                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2633
2634                 for (j = 0; j < unique_indirect_reg_count; j++) {
2635                         if (register_list_format[i] == unique_indirect_regs[j]) {
2636                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2637                                 break;
2638                         }
2639                 }
2640
2641                 BUG_ON(j >= unique_indirect_reg_count);
2642
2643                 i++;
2644         }
2645
2646         /* set save/restore list size */
2647         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2648         list_size = list_size >> 1;
2649         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2650                 adev->gfx.rlc.reg_restore_list_size);
2651         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2652
2653         /* write the starting offsets to RLC scratch ram */
2654         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2655                 adev->gfx.rlc.starting_offsets_start);
2656         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2657                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2658                        indirect_start_offsets[i]);
2659
2660         /* load unique indirect regs*/
2661         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2662                 if (unique_indirect_regs[i] != 0) {
2663                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2664                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2665                                unique_indirect_regs[i] & 0x3FFFF);
2666
2667                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2668                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2669                                unique_indirect_regs[i] >> 20);
2670                 }
2671         }
2672
2673         kfree(register_list_format);
2674         return 0;
2675 }
2676
2677 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2678 {
2679         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2680 }
2681
2682 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2683                                              bool enable)
2684 {
2685         uint32_t data = 0;
2686         uint32_t default_data = 0;
2687
2688         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2689         if (enable == true) {
2690                 /* enable GFXIP control over CGPG */
2691                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2692                 if(default_data != data)
2693                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2694
2695                 /* update status */
2696                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2697                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2698                 if(default_data != data)
2699                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2700         } else {
2701                 /* restore GFXIP control over GCPG */
2702                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2703                 if(default_data != data)
2704                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2705         }
2706 }
2707
2708 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2709 {
2710         uint32_t data = 0;
2711
2712         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2713                               AMD_PG_SUPPORT_GFX_SMG |
2714                               AMD_PG_SUPPORT_GFX_DMG)) {
2715                 /* init IDLE_POLL_COUNT = 60 */
2716                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2717                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2718                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2719                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2720
2721                 /* init RLC PG Delay */
2722                 data = 0;
2723                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2724                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2725                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2726                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2727                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2728
2729                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2730                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2731                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2732                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2733
2734                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2735                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2736                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2737                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2738
2739                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2740                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2741
2742                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2743                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2744                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2745
2746                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2747         }
2748 }
2749
2750 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2751                                                 bool enable)
2752 {
2753         uint32_t data = 0;
2754         uint32_t default_data = 0;
2755
2756         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2757         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2758                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2759                              enable ? 1 : 0);
2760         if (default_data != data)
2761                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2762 }
2763
2764 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2765                                                 bool enable)
2766 {
2767         uint32_t data = 0;
2768         uint32_t default_data = 0;
2769
2770         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2771         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2772                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2773                              enable ? 1 : 0);
2774         if(default_data != data)
2775                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2776 }
2777
2778 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2779                                         bool enable)
2780 {
2781         uint32_t data = 0;
2782         uint32_t default_data = 0;
2783
2784         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2785         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2786                              CP_PG_DISABLE,
2787                              enable ? 0 : 1);
2788         if(default_data != data)
2789                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2790 }
2791
2792 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2793                                                 bool enable)
2794 {
2795         uint32_t data, default_data;
2796
2797         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2798         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2799                              GFX_POWER_GATING_ENABLE,
2800                              enable ? 1 : 0);
2801         if(default_data != data)
2802                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2803 }
2804
2805 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2806                                                 bool enable)
2807 {
2808         uint32_t data, default_data;
2809
2810         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2811         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2812                              GFX_PIPELINE_PG_ENABLE,
2813                              enable ? 1 : 0);
2814         if(default_data != data)
2815                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2816
2817         if (!enable)
2818                 /* read any GFX register to wake up GFX */
2819                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2820 }
2821
2822 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2823                                                        bool enable)
2824 {
2825         uint32_t data, default_data;
2826
2827         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2828         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2829                              STATIC_PER_CU_PG_ENABLE,
2830                              enable ? 1 : 0);
2831         if(default_data != data)
2832                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2833 }
2834
2835 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2836                                                 bool enable)
2837 {
2838         uint32_t data, default_data;
2839
2840         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2841         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2842                              DYN_PER_CU_PG_ENABLE,
2843                              enable ? 1 : 0);
2844         if(default_data != data)
2845                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2846 }
2847
2848 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2849 {
2850         gfx_v9_0_init_csb(adev);
2851
2852         /*
2853          * Rlc save restore list is workable since v2_1.
2854          * And it's needed by gfxoff feature.
2855          */
2856         if (adev->gfx.rlc.is_rlc_v2_1) {
2857                 gfx_v9_1_init_rlc_save_restore_list(adev);
2858                 gfx_v9_0_enable_save_restore_machine(adev);
2859         }
2860
2861         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2862                               AMD_PG_SUPPORT_GFX_SMG |
2863                               AMD_PG_SUPPORT_GFX_DMG |
2864                               AMD_PG_SUPPORT_CP |
2865                               AMD_PG_SUPPORT_GDS |
2866                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2867                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2868                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2869                 gfx_v9_0_init_gfx_power_gating(adev);
2870         }
2871 }
2872
2873 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2874 {
2875         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2876         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2877         gfx_v9_0_wait_for_rlc_serdes(adev);
2878 }
2879
2880 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2881 {
2882         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2883         udelay(50);
2884         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2885         udelay(50);
2886 }
2887
2888 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2889 {
2890 #ifdef AMDGPU_RLC_DEBUG_RETRY
2891         u32 rlc_ucode_ver;
2892 #endif
2893
2894         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2895         udelay(50);
2896
2897         /* carrizo do enable cp interrupt after cp inited */
2898         if (!(adev->flags & AMD_IS_APU)) {
2899                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2900                 udelay(50);
2901         }
2902
2903 #ifdef AMDGPU_RLC_DEBUG_RETRY
2904         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2905         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2906         if(rlc_ucode_ver == 0x108) {
2907                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2908                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2909                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2910                  * default is 0x9C4 to create a 100us interval */
2911                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2912                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2913                  * to disable the page fault retry interrupts, default is
2914                  * 0x100 (256) */
2915                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2916         }
2917 #endif
2918 }
2919
2920 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2921 {
2922         const struct rlc_firmware_header_v2_0 *hdr;
2923         const __le32 *fw_data;
2924         unsigned i, fw_size;
2925
2926         if (!adev->gfx.rlc_fw)
2927                 return -EINVAL;
2928
2929         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2930         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2931
2932         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2933                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2934         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2935
2936         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2937                         RLCG_UCODE_LOADING_START_ADDRESS);
2938         for (i = 0; i < fw_size; i++)
2939                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2940         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2941
2942         return 0;
2943 }
2944
2945 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2946 {
2947         int r;
2948
2949         if (amdgpu_sriov_vf(adev)) {
2950                 gfx_v9_0_init_csb(adev);
2951                 return 0;
2952         }
2953
2954         adev->gfx.rlc.funcs->stop(adev);
2955
2956         /* disable CG */
2957         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2958
2959         gfx_v9_0_init_pg(adev);
2960
2961         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2962                 /* legacy rlc firmware loading */
2963                 r = gfx_v9_0_rlc_load_microcode(adev);
2964                 if (r)
2965                         return r;
2966         }
2967
2968         switch (adev->asic_type) {
2969         case CHIP_RAVEN:
2970                 if (amdgpu_lbpw == 0)
2971                         gfx_v9_0_enable_lbpw(adev, false);
2972                 else
2973                         gfx_v9_0_enable_lbpw(adev, true);
2974                 break;
2975         case CHIP_VEGA20:
2976                 if (amdgpu_lbpw > 0)
2977                         gfx_v9_0_enable_lbpw(adev, true);
2978                 else
2979                         gfx_v9_0_enable_lbpw(adev, false);
2980                 break;
2981         default:
2982                 break;
2983         }
2984
2985         adev->gfx.rlc.funcs->start(adev);
2986
2987         return 0;
2988 }
2989
2990 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2991 {
2992         int i;
2993         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2994
2995         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2996         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2997         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2998         if (!enable) {
2999                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3000                         adev->gfx.gfx_ring[i].sched.ready = false;
3001         }
3002         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3003         udelay(50);
3004 }
3005
3006 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3007 {
3008         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3009         const struct gfx_firmware_header_v1_0 *ce_hdr;
3010         const struct gfx_firmware_header_v1_0 *me_hdr;
3011         const __le32 *fw_data;
3012         unsigned i, fw_size;
3013
3014         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3015                 return -EINVAL;
3016
3017         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3018                 adev->gfx.pfp_fw->data;
3019         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3020                 adev->gfx.ce_fw->data;
3021         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3022                 adev->gfx.me_fw->data;
3023
3024         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3025         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3026         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3027
3028         gfx_v9_0_cp_gfx_enable(adev, false);
3029
3030         /* PFP */
3031         fw_data = (const __le32 *)
3032                 (adev->gfx.pfp_fw->data +
3033                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3034         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3035         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3036         for (i = 0; i < fw_size; i++)
3037                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3038         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3039
3040         /* CE */
3041         fw_data = (const __le32 *)
3042                 (adev->gfx.ce_fw->data +
3043                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3044         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3045         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3046         for (i = 0; i < fw_size; i++)
3047                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3048         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3049
3050         /* ME */
3051         fw_data = (const __le32 *)
3052                 (adev->gfx.me_fw->data +
3053                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3054         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3055         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3056         for (i = 0; i < fw_size; i++)
3057                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3058         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3059
3060         return 0;
3061 }
3062
3063 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3064 {
3065         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3066         const struct cs_section_def *sect = NULL;
3067         const struct cs_extent_def *ext = NULL;
3068         int r, i, tmp;
3069
3070         /* init the CP */
3071         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3072         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3073
3074         gfx_v9_0_cp_gfx_enable(adev, true);
3075
3076         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3077         if (r) {
3078                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3079                 return r;
3080         }
3081
3082         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3083         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3084
3085         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3086         amdgpu_ring_write(ring, 0x80000000);
3087         amdgpu_ring_write(ring, 0x80000000);
3088
3089         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3090                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3091                         if (sect->id == SECT_CONTEXT) {
3092                                 amdgpu_ring_write(ring,
3093                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3094                                                ext->reg_count));
3095                                 amdgpu_ring_write(ring,
3096                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3097                                 for (i = 0; i < ext->reg_count; i++)
3098                                         amdgpu_ring_write(ring, ext->extent[i]);
3099                         }
3100                 }
3101         }
3102
3103         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3104         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3105
3106         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3107         amdgpu_ring_write(ring, 0);
3108
3109         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3110         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3111         amdgpu_ring_write(ring, 0x8000);
3112         amdgpu_ring_write(ring, 0x8000);
3113
3114         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3115         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3116                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3117         amdgpu_ring_write(ring, tmp);
3118         amdgpu_ring_write(ring, 0);
3119
3120         amdgpu_ring_commit(ring);
3121
3122         return 0;
3123 }
3124
3125 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3126 {
3127         struct amdgpu_ring *ring;
3128         u32 tmp;
3129         u32 rb_bufsz;
3130         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3131
3132         /* Set the write pointer delay */
3133         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3134
3135         /* set the RB to use vmid 0 */
3136         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3137
3138         /* Set ring buffer size */
3139         ring = &adev->gfx.gfx_ring[0];
3140         rb_bufsz = order_base_2(ring->ring_size / 8);
3141         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3142         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3143 #ifdef __BIG_ENDIAN
3144         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3145 #endif
3146         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3147
3148         /* Initialize the ring buffer's write pointers */
3149         ring->wptr = 0;
3150         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3151         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3152
3153         /* set the wb address wether it's enabled or not */
3154         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3155         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3156         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3157
3158         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3159         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3160         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3161
3162         mdelay(1);
3163         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3164
3165         rb_addr = ring->gpu_addr >> 8;
3166         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3167         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3168
3169         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3170         if (ring->use_doorbell) {
3171                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3172                                     DOORBELL_OFFSET, ring->doorbell_index);
3173                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3174                                     DOORBELL_EN, 1);
3175         } else {
3176                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3177         }
3178         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3179
3180         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3181                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3182         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3183
3184         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3185                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3186
3187
3188         /* start the ring */
3189         gfx_v9_0_cp_gfx_start(adev);
3190         ring->sched.ready = true;
3191
3192         return 0;
3193 }
3194
3195 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3196 {
3197         int i;
3198
3199         if (enable) {
3200                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3201         } else {
3202                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3203                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3204                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3205                         adev->gfx.compute_ring[i].sched.ready = false;
3206                 adev->gfx.kiq.ring.sched.ready = false;
3207         }
3208         udelay(50);
3209 }
3210
3211 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3212 {
3213         const struct gfx_firmware_header_v1_0 *mec_hdr;
3214         const __le32 *fw_data;
3215         unsigned i;
3216         u32 tmp;
3217
3218         if (!adev->gfx.mec_fw)
3219                 return -EINVAL;
3220
3221         gfx_v9_0_cp_compute_enable(adev, false);
3222
3223         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3224         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3225
3226         fw_data = (const __le32 *)
3227                 (adev->gfx.mec_fw->data +
3228                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3229         tmp = 0;
3230         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3231         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3232         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3233
3234         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3235                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3236         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3237                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3238
3239         /* MEC1 */
3240         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3241                          mec_hdr->jt_offset);
3242         for (i = 0; i < mec_hdr->jt_size; i++)
3243                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3244                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3245
3246         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3247                         adev->gfx.mec_fw_version);
3248         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3249
3250         return 0;
3251 }
3252
3253 /* KIQ functions */
3254 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3255 {
3256         uint32_t tmp;
3257         struct amdgpu_device *adev = ring->adev;
3258
3259         /* tell RLC which is KIQ queue */
3260         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3261         tmp &= 0xffffff00;
3262         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3263         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3264         tmp |= 0x80;
3265         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3266 }
3267
3268 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3269 {
3270         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3271         uint64_t queue_mask = 0;
3272         int r, i;
3273
3274         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3275                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3276                         continue;
3277
3278                 /* This situation may be hit in the future if a new HW
3279                  * generation exposes more than 64 queues. If so, the
3280                  * definition of queue_mask needs updating */
3281                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3282                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3283                         break;
3284                 }
3285
3286                 queue_mask |= (1ull << i);
3287         }
3288
3289         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3290         if (r) {
3291                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3292                 return r;
3293         }
3294
3295         /* set resources */
3296         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3297         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3298                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3299         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3300         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3301         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3302         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3303         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3304         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3305         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3306                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3307                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3308                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3309
3310                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3311                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3312                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3313                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3314                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3315                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3316                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3317                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3318                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3319                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3320                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3321                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3322                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3323                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3324                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3325                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3326                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3327         }
3328
3329         r = amdgpu_ring_test_helper(kiq_ring);
3330         if (r)
3331                 DRM_ERROR("KCQ enable failed\n");
3332
3333         return r;
3334 }
3335
3336 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3337 {
3338         struct amdgpu_device *adev = ring->adev;
3339         struct v9_mqd *mqd = ring->mqd_ptr;
3340         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3341         uint32_t tmp;
3342
3343         mqd->header = 0xC0310800;
3344         mqd->compute_pipelinestat_enable = 0x00000001;
3345         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3346         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3347         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3348         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3349         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3350         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3351         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3352         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3353         mqd->compute_misc_reserved = 0x00000003;
3354
3355         mqd->dynamic_cu_mask_addr_lo =
3356                 lower_32_bits(ring->mqd_gpu_addr
3357                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3358         mqd->dynamic_cu_mask_addr_hi =
3359                 upper_32_bits(ring->mqd_gpu_addr
3360                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3361
3362         eop_base_addr = ring->eop_gpu_addr >> 8;
3363         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3364         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3365
3366         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3367         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3368         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3369                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3370
3371         mqd->cp_hqd_eop_control = tmp;
3372
3373         /* enable doorbell? */
3374         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3375
3376         if (ring->use_doorbell) {
3377                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3378                                     DOORBELL_OFFSET, ring->doorbell_index);
3379                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3380                                     DOORBELL_EN, 1);
3381                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3382                                     DOORBELL_SOURCE, 0);
3383                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3384                                     DOORBELL_HIT, 0);
3385         } else {
3386                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3387                                          DOORBELL_EN, 0);
3388         }
3389
3390         mqd->cp_hqd_pq_doorbell_control = tmp;
3391
3392         /* disable the queue if it's active */
3393         ring->wptr = 0;
3394         mqd->cp_hqd_dequeue_request = 0;
3395         mqd->cp_hqd_pq_rptr = 0;
3396         mqd->cp_hqd_pq_wptr_lo = 0;
3397         mqd->cp_hqd_pq_wptr_hi = 0;
3398
3399         /* set the pointer to the MQD */
3400         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3401         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3402
3403         /* set MQD vmid to 0 */
3404         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3405         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3406         mqd->cp_mqd_control = tmp;
3407
3408         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3409         hqd_gpu_addr = ring->gpu_addr >> 8;
3410         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3411         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3412
3413         /* set up the HQD, this is similar to CP_RB0_CNTL */
3414         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3415         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3416                             (order_base_2(ring->ring_size / 4) - 1));
3417         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3418                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3419 #ifdef __BIG_ENDIAN
3420         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3421 #endif
3422         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3423         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3424         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3425         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3426         mqd->cp_hqd_pq_control = tmp;
3427
3428         /* set the wb address whether it's enabled or not */
3429         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3430         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3431         mqd->cp_hqd_pq_rptr_report_addr_hi =
3432                 upper_32_bits(wb_gpu_addr) & 0xffff;
3433
3434         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3435         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3436         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3437         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3438
3439         tmp = 0;
3440         /* enable the doorbell if requested */
3441         if (ring->use_doorbell) {
3442                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3443                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3444                                 DOORBELL_OFFSET, ring->doorbell_index);
3445
3446                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3447                                          DOORBELL_EN, 1);
3448                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3449                                          DOORBELL_SOURCE, 0);
3450                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3451                                          DOORBELL_HIT, 0);
3452         }
3453
3454         mqd->cp_hqd_pq_doorbell_control = tmp;
3455
3456         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3457         ring->wptr = 0;
3458         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3459
3460         /* set the vmid for the queue */
3461         mqd->cp_hqd_vmid = 0;
3462
3463         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3464         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3465         mqd->cp_hqd_persistent_state = tmp;
3466
3467         /* set MIN_IB_AVAIL_SIZE */
3468         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3469         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3470         mqd->cp_hqd_ib_control = tmp;
3471
3472         /* activate the queue */
3473         mqd->cp_hqd_active = 1;
3474
3475         return 0;
3476 }
3477
3478 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3479 {
3480         struct amdgpu_device *adev = ring->adev;
3481         struct v9_mqd *mqd = ring->mqd_ptr;
3482         int j;
3483
3484         /* disable wptr polling */
3485         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3486
3487         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3488                mqd->cp_hqd_eop_base_addr_lo);
3489         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3490                mqd->cp_hqd_eop_base_addr_hi);
3491
3492         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3493         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3494                mqd->cp_hqd_eop_control);
3495
3496         /* enable doorbell? */
3497         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3498                mqd->cp_hqd_pq_doorbell_control);
3499
3500         /* disable the queue if it's active */
3501         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3502                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3503                 for (j = 0; j < adev->usec_timeout; j++) {
3504                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3505                                 break;
3506                         udelay(1);
3507                 }
3508                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3509                        mqd->cp_hqd_dequeue_request);
3510                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3511                        mqd->cp_hqd_pq_rptr);
3512                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3513                        mqd->cp_hqd_pq_wptr_lo);
3514                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3515                        mqd->cp_hqd_pq_wptr_hi);
3516         }
3517
3518         /* set the pointer to the MQD */
3519         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3520                mqd->cp_mqd_base_addr_lo);
3521         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3522                mqd->cp_mqd_base_addr_hi);
3523
3524         /* set MQD vmid to 0 */
3525         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3526                mqd->cp_mqd_control);
3527
3528         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3529         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3530                mqd->cp_hqd_pq_base_lo);
3531         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3532                mqd->cp_hqd_pq_base_hi);
3533
3534         /* set up the HQD, this is similar to CP_RB0_CNTL */
3535         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3536                mqd->cp_hqd_pq_control);
3537
3538         /* set the wb address whether it's enabled or not */
3539         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3540                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3541         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3542                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3543
3544         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3545         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3546                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3547         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3548                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3549
3550         /* enable the doorbell if requested */
3551         if (ring->use_doorbell) {
3552                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3553                                         (adev->doorbell_index.kiq * 2) << 2);
3554                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3555                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3556         }
3557
3558         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3559                mqd->cp_hqd_pq_doorbell_control);
3560
3561         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3562         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3563                mqd->cp_hqd_pq_wptr_lo);
3564         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3565                mqd->cp_hqd_pq_wptr_hi);
3566
3567         /* set the vmid for the queue */
3568         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3569
3570         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3571                mqd->cp_hqd_persistent_state);
3572
3573         /* activate the queue */
3574         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3575                mqd->cp_hqd_active);
3576
3577         if (ring->use_doorbell)
3578                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3579
3580         return 0;
3581 }
3582
3583 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3584 {
3585         struct amdgpu_device *adev = ring->adev;
3586         int j;
3587
3588         /* disable the queue if it's active */
3589         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3590
3591                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3592
3593                 for (j = 0; j < adev->usec_timeout; j++) {
3594                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3595                                 break;
3596                         udelay(1);
3597                 }
3598
3599                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3600                         DRM_DEBUG("KIQ dequeue request failed.\n");
3601
3602                         /* Manual disable if dequeue request times out */
3603                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3604                 }
3605
3606                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3607                       0);
3608         }
3609
3610         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3611         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3612         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3613         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3614         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3615         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3616         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3617         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3618
3619         return 0;
3620 }
3621
3622 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3623 {
3624         struct amdgpu_device *adev = ring->adev;
3625         struct v9_mqd *mqd = ring->mqd_ptr;
3626         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3627
3628         gfx_v9_0_kiq_setting(ring);
3629
3630         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3631                 /* reset MQD to a clean status */
3632                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3633                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3634
3635                 /* reset ring buffer */
3636                 ring->wptr = 0;
3637                 amdgpu_ring_clear_ring(ring);
3638
3639                 mutex_lock(&adev->srbm_mutex);
3640                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3641                 gfx_v9_0_kiq_init_register(ring);
3642                 soc15_grbm_select(adev, 0, 0, 0, 0);
3643                 mutex_unlock(&adev->srbm_mutex);
3644         } else {
3645                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3646                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3647                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3648                 mutex_lock(&adev->srbm_mutex);
3649                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3650                 gfx_v9_0_mqd_init(ring);
3651                 gfx_v9_0_kiq_init_register(ring);
3652                 soc15_grbm_select(adev, 0, 0, 0, 0);
3653                 mutex_unlock(&adev->srbm_mutex);
3654
3655                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3656                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3657         }
3658
3659         return 0;
3660 }
3661
3662 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3663 {
3664         struct amdgpu_device *adev = ring->adev;
3665         struct v9_mqd *mqd = ring->mqd_ptr;
3666         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3667
3668         if (!adev->in_gpu_reset && !adev->in_suspend) {
3669                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3670                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3671                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3672                 mutex_lock(&adev->srbm_mutex);
3673                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3674                 gfx_v9_0_mqd_init(ring);
3675                 soc15_grbm_select(adev, 0, 0, 0, 0);
3676                 mutex_unlock(&adev->srbm_mutex);
3677
3678                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3679                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3680         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3681                 /* reset MQD to a clean status */
3682                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3683                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3684
3685                 /* reset ring buffer */
3686                 ring->wptr = 0;
3687                 amdgpu_ring_clear_ring(ring);
3688         } else {
3689                 amdgpu_ring_clear_ring(ring);
3690         }
3691
3692         return 0;
3693 }
3694
3695 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3696 {
3697         struct amdgpu_ring *ring;
3698         int r;
3699
3700         ring = &adev->gfx.kiq.ring;
3701
3702         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3703         if (unlikely(r != 0))
3704                 return r;
3705
3706         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3707         if (unlikely(r != 0))
3708                 return r;
3709
3710         gfx_v9_0_kiq_init_queue(ring);
3711         amdgpu_bo_kunmap(ring->mqd_obj);
3712         ring->mqd_ptr = NULL;
3713         amdgpu_bo_unreserve(ring->mqd_obj);
3714         ring->sched.ready = true;
3715         return 0;
3716 }
3717
3718 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3719 {
3720         struct amdgpu_ring *ring = NULL;
3721         int r = 0, i;
3722
3723         gfx_v9_0_cp_compute_enable(adev, true);
3724
3725         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3726                 ring = &adev->gfx.compute_ring[i];
3727
3728                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3729                 if (unlikely(r != 0))
3730                         goto done;
3731                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3732                 if (!r) {
3733                         r = gfx_v9_0_kcq_init_queue(ring);
3734                         amdgpu_bo_kunmap(ring->mqd_obj);
3735                         ring->mqd_ptr = NULL;
3736                 }
3737                 amdgpu_bo_unreserve(ring->mqd_obj);
3738                 if (r)
3739                         goto done;
3740         }
3741
3742         r = gfx_v9_0_kiq_kcq_enable(adev);
3743 done:
3744         return r;
3745 }
3746
3747 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3748 {
3749         int r, i;
3750         struct amdgpu_ring *ring;
3751
3752         if (!(adev->flags & AMD_IS_APU))
3753                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3754
3755         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3756                 if (adev->asic_type != CHIP_ARCTURUS) {
3757                         /* legacy firmware loading */
3758                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3759                         if (r)
3760                                 return r;
3761                 }
3762
3763                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3764                 if (r)
3765                         return r;
3766         }
3767
3768         r = gfx_v9_0_kiq_resume(adev);
3769         if (r)
3770                 return r;
3771
3772         if (adev->asic_type != CHIP_ARCTURUS) {
3773                 r = gfx_v9_0_cp_gfx_resume(adev);
3774                 if (r)
3775                         return r;
3776         }
3777
3778         r = gfx_v9_0_kcq_resume(adev);
3779         if (r)
3780                 return r;
3781
3782         if (adev->asic_type != CHIP_ARCTURUS) {
3783                 ring = &adev->gfx.gfx_ring[0];
3784                 r = amdgpu_ring_test_helper(ring);
3785                 if (r)
3786                         return r;
3787         }
3788
3789         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3790                 ring = &adev->gfx.compute_ring[i];
3791                 amdgpu_ring_test_helper(ring);
3792         }
3793
3794         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3795
3796         return 0;
3797 }
3798
3799 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3800 {
3801         if (adev->asic_type != CHIP_ARCTURUS)
3802                 gfx_v9_0_cp_gfx_enable(adev, enable);
3803         gfx_v9_0_cp_compute_enable(adev, enable);
3804 }
3805
3806 static int gfx_v9_0_hw_init(void *handle)
3807 {
3808         int r;
3809         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3810
3811         if (!amdgpu_sriov_vf(adev))
3812                 gfx_v9_0_init_golden_registers(adev);
3813
3814         gfx_v9_0_constants_init(adev);
3815
3816         r = gfx_v9_0_csb_vram_pin(adev);
3817         if (r)
3818                 return r;
3819
3820         r = adev->gfx.rlc.funcs->resume(adev);
3821         if (r)
3822                 return r;
3823
3824         r = gfx_v9_0_cp_resume(adev);
3825         if (r)
3826                 return r;
3827
3828         if (adev->asic_type != CHIP_ARCTURUS) {
3829                 r = gfx_v9_0_ngg_en(adev);
3830                 if (r)
3831                         return r;
3832         }
3833
3834         return r;
3835 }
3836
3837 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3838 {
3839         int r, i;
3840         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3841
3842         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3843         if (r)
3844                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3845
3846         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3847                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3848
3849                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3850                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3851                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3852                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3853                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3854                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3855                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3856                 amdgpu_ring_write(kiq_ring, 0);
3857                 amdgpu_ring_write(kiq_ring, 0);
3858                 amdgpu_ring_write(kiq_ring, 0);
3859         }
3860         r = amdgpu_ring_test_helper(kiq_ring);
3861         if (r)
3862                 DRM_ERROR("KCQ disable failed\n");
3863
3864         return r;
3865 }
3866
3867 static int gfx_v9_0_hw_fini(void *handle)
3868 {
3869         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3870
3871         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3872         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3873         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3874
3875         /* disable KCQ to avoid CPC touch memory not valid anymore */
3876         gfx_v9_0_kcq_disable(adev);
3877
3878         if (amdgpu_sriov_vf(adev)) {
3879                 gfx_v9_0_cp_gfx_enable(adev, false);
3880                 /* must disable polling for SRIOV when hw finished, otherwise
3881                  * CPC engine may still keep fetching WB address which is already
3882                  * invalid after sw finished and trigger DMAR reading error in
3883                  * hypervisor side.
3884                  */
3885                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3886                 return 0;
3887         }
3888
3889         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3890          * otherwise KIQ is hanging when binding back
3891          */
3892         if (!adev->in_gpu_reset && !adev->in_suspend) {
3893                 mutex_lock(&adev->srbm_mutex);
3894                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3895                                 adev->gfx.kiq.ring.pipe,
3896                                 adev->gfx.kiq.ring.queue, 0);
3897                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3898                 soc15_grbm_select(adev, 0, 0, 0, 0);
3899                 mutex_unlock(&adev->srbm_mutex);
3900         }
3901
3902         gfx_v9_0_cp_enable(adev, false);
3903         adev->gfx.rlc.funcs->stop(adev);
3904
3905         gfx_v9_0_csb_vram_unpin(adev);
3906
3907         return 0;
3908 }
3909
3910 static int gfx_v9_0_suspend(void *handle)
3911 {
3912         return gfx_v9_0_hw_fini(handle);
3913 }
3914
3915 static int gfx_v9_0_resume(void *handle)
3916 {
3917         return gfx_v9_0_hw_init(handle);
3918 }
3919
3920 static bool gfx_v9_0_is_idle(void *handle)
3921 {
3922         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3923
3924         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3925                                 GRBM_STATUS, GUI_ACTIVE))
3926                 return false;
3927         else
3928                 return true;
3929 }
3930
3931 static int gfx_v9_0_wait_for_idle(void *handle)
3932 {
3933         unsigned i;
3934         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3935
3936         for (i = 0; i < adev->usec_timeout; i++) {
3937                 if (gfx_v9_0_is_idle(handle))
3938                         return 0;
3939                 udelay(1);
3940         }
3941         return -ETIMEDOUT;
3942 }
3943
3944 static int gfx_v9_0_soft_reset(void *handle)
3945 {
3946         u32 grbm_soft_reset = 0;
3947         u32 tmp;
3948         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3949
3950         /* GRBM_STATUS */
3951         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3952         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3953                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3954                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3955                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3956                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3957                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3958                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3959                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3960                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3961                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3962         }
3963
3964         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3965                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3966                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3967         }
3968
3969         /* GRBM_STATUS2 */
3970         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3971         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3972                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3973                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3974
3975
3976         if (grbm_soft_reset) {
3977                 /* stop the rlc */
3978                 adev->gfx.rlc.funcs->stop(adev);
3979
3980                 if (adev->asic_type != CHIP_ARCTURUS)
3981                         /* Disable GFX parsing/prefetching */
3982                         gfx_v9_0_cp_gfx_enable(adev, false);
3983
3984                 /* Disable MEC parsing/prefetching */
3985                 gfx_v9_0_cp_compute_enable(adev, false);
3986
3987                 if (grbm_soft_reset) {
3988                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3989                         tmp |= grbm_soft_reset;
3990                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3991                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3992                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3993
3994                         udelay(50);
3995
3996                         tmp &= ~grbm_soft_reset;
3997                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3998                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3999                 }
4000
4001                 /* Wait a little for things to settle down */
4002                 udelay(50);
4003         }
4004         return 0;
4005 }
4006
4007 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4008 {
4009         uint64_t clock;
4010
4011         mutex_lock(&adev->gfx.gpu_clock_mutex);
4012         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4013         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4014                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4015         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4016         return clock;
4017 }
4018
4019 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4020                                           uint32_t vmid,
4021                                           uint32_t gds_base, uint32_t gds_size,
4022                                           uint32_t gws_base, uint32_t gws_size,
4023                                           uint32_t oa_base, uint32_t oa_size)
4024 {
4025         struct amdgpu_device *adev = ring->adev;
4026
4027         /* GDS Base */
4028         gfx_v9_0_write_data_to_reg(ring, 0, false,
4029                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4030                                    gds_base);
4031
4032         /* GDS Size */
4033         gfx_v9_0_write_data_to_reg(ring, 0, false,
4034                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4035                                    gds_size);
4036
4037         /* GWS */
4038         gfx_v9_0_write_data_to_reg(ring, 0, false,
4039                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4040                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4041
4042         /* OA */
4043         gfx_v9_0_write_data_to_reg(ring, 0, false,
4044                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4045                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4046 }
4047
4048 static const u32 vgpr_init_compute_shader[] =
4049 {
4050         0xb07c0000, 0xbe8000ff,
4051         0x000000f8, 0xbf110800,
4052         0x7e000280, 0x7e020280,
4053         0x7e040280, 0x7e060280,
4054         0x7e080280, 0x7e0a0280,
4055         0x7e0c0280, 0x7e0e0280,
4056         0x80808800, 0xbe803200,
4057         0xbf84fff5, 0xbf9c0000,
4058         0xd28c0001, 0x0001007f,
4059         0xd28d0001, 0x0002027e,
4060         0x10020288, 0xb8810904,
4061         0xb7814000, 0xd1196a01,
4062         0x00000301, 0xbe800087,
4063         0xbefc00c1, 0xd89c4000,
4064         0x00020201, 0xd89cc080,
4065         0x00040401, 0x320202ff,
4066         0x00000800, 0x80808100,
4067         0xbf84fff8, 0x7e020280,
4068         0xbf810000, 0x00000000,
4069 };
4070
4071 static const u32 sgpr_init_compute_shader[] =
4072 {
4073         0xb07c0000, 0xbe8000ff,
4074         0x0000005f, 0xbee50080,
4075         0xbe812c65, 0xbe822c65,
4076         0xbe832c65, 0xbe842c65,
4077         0xbe852c65, 0xb77c0005,
4078         0x80808500, 0xbf84fff8,
4079         0xbe800080, 0xbf810000,
4080 };
4081
4082 static const struct soc15_reg_entry vgpr_init_regs[] = {
4083    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4084    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4085    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4086    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4087    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4088    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4089    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4090    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4091    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4092    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4093 };
4094
4095 static const struct soc15_reg_entry sgpr_init_regs[] = {
4096    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4097    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4098    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4099    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4100    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4101    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4102    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4103    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4104    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4105    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4106 };
4107
4108 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4109    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4110    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4111    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4112    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4113    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4114    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4115    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4116    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4117    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4118    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4119    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4120    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4121    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4122    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4123    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4124    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4125    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4126    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4127    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4128    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4129    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4130    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4131    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4132    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4133    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4134    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4135    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4136    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4137    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4138    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4139    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4140    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4141 };
4142
4143 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4144 {
4145         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4146         int i, r;
4147
4148         r = amdgpu_ring_alloc(ring, 7);
4149         if (r) {
4150                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4151                         ring->name, r);
4152                 return r;
4153         }
4154
4155         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4156         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4157
4158         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4159         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4160                                 PACKET3_DMA_DATA_DST_SEL(1) |
4161                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4162                                 PACKET3_DMA_DATA_ENGINE(0)));
4163         amdgpu_ring_write(ring, 0);
4164         amdgpu_ring_write(ring, 0);
4165         amdgpu_ring_write(ring, 0);
4166         amdgpu_ring_write(ring, 0);
4167         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4168                                 adev->gds.gds_size);
4169
4170         amdgpu_ring_commit(ring);
4171
4172         for (i = 0; i < adev->usec_timeout; i++) {
4173                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4174                         break;
4175                 udelay(1);
4176         }
4177
4178         if (i >= adev->usec_timeout)
4179                 r = -ETIMEDOUT;
4180
4181         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4182
4183         return r;
4184 }
4185
4186 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4187 {
4188         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4189         struct amdgpu_ib ib;
4190         struct dma_fence *f = NULL;
4191         int r, i, j, k;
4192         unsigned total_size, vgpr_offset, sgpr_offset;
4193         u64 gpu_addr;
4194
4195         /* only support when RAS is enabled */
4196         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4197                 return 0;
4198
4199         /* bail if the compute ring is not ready */
4200         if (!ring->sched.ready)
4201                 return 0;
4202
4203         total_size =
4204                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4205         total_size +=
4206                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4207         total_size = ALIGN(total_size, 256);
4208         vgpr_offset = total_size;
4209         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4210         sgpr_offset = total_size;
4211         total_size += sizeof(sgpr_init_compute_shader);
4212
4213         /* allocate an indirect buffer to put the commands in */
4214         memset(&ib, 0, sizeof(ib));
4215         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4216         if (r) {
4217                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4218                 return r;
4219         }
4220
4221         /* load the compute shaders */
4222         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4223                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4224
4225         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4226                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4227
4228         /* init the ib length to 0 */
4229         ib.length_dw = 0;
4230
4231         /* VGPR */
4232         /* write the register state for the compute dispatch */
4233         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4234                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4235                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4236                                                                 - PACKET3_SET_SH_REG_START;
4237                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4238         }
4239         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4240         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4241         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4242         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4243                                                         - PACKET3_SET_SH_REG_START;
4244         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4245         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4246
4247         /* write dispatch packet */
4248         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4249         ib.ptr[ib.length_dw++] = 128; /* x */
4250         ib.ptr[ib.length_dw++] = 1; /* y */
4251         ib.ptr[ib.length_dw++] = 1; /* z */
4252         ib.ptr[ib.length_dw++] =
4253                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4254
4255         /* write CS partial flush packet */
4256         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4257         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4258
4259         /* SGPR */
4260         /* write the register state for the compute dispatch */
4261         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4262                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4263                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4264                                                                 - PACKET3_SET_SH_REG_START;
4265                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4266         }
4267         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4268         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4269         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4270         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4271                                                         - PACKET3_SET_SH_REG_START;
4272         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4273         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4274
4275         /* write dispatch packet */
4276         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4277         ib.ptr[ib.length_dw++] = 128; /* x */
4278         ib.ptr[ib.length_dw++] = 1; /* y */
4279         ib.ptr[ib.length_dw++] = 1; /* z */
4280         ib.ptr[ib.length_dw++] =
4281                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4282
4283         /* write CS partial flush packet */
4284         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4285         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4286
4287         /* shedule the ib on the ring */
4288         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4289         if (r) {
4290                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4291                 goto fail;
4292         }
4293
4294         /* wait for the GPU to finish processing the IB */
4295         r = dma_fence_wait(f, false);
4296         if (r) {
4297                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4298                 goto fail;
4299         }
4300
4301         /* read back registers to clear the counters */
4302         mutex_lock(&adev->grbm_idx_mutex);
4303         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4304                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4305                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4306                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4307                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4308                         }
4309                 }
4310         }
4311         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4312         mutex_unlock(&adev->grbm_idx_mutex);
4313
4314 fail:
4315         amdgpu_ib_free(adev, &ib, NULL);
4316         dma_fence_put(f);
4317
4318         return r;
4319 }
4320
4321 static int gfx_v9_0_early_init(void *handle)
4322 {
4323         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4324
4325         if (adev->asic_type == CHIP_ARCTURUS)
4326                 adev->gfx.num_gfx_rings = 0;
4327         else
4328                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4329         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4330         gfx_v9_0_set_ring_funcs(adev);
4331         gfx_v9_0_set_irq_funcs(adev);
4332         gfx_v9_0_set_gds_init(adev);
4333         gfx_v9_0_set_rlc_funcs(adev);
4334
4335         return 0;
4336 }
4337
4338 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4339                 struct ras_err_data *err_data,
4340                 struct amdgpu_iv_entry *entry);
4341
4342 static int gfx_v9_0_ecc_late_init(void *handle)
4343 {
4344         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4345         struct ras_common_if **ras_if = &adev->gfx.ras_if;
4346         struct ras_ih_if ih_info = {
4347                 .cb = gfx_v9_0_process_ras_data_cb,
4348         };
4349         struct ras_fs_if fs_info = {
4350                 .sysfs_name = "gfx_err_count",
4351                 .debugfs_name = "gfx_err_inject",
4352         };
4353         struct ras_common_if ras_block = {
4354                 .block = AMDGPU_RAS_BLOCK__GFX,
4355                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4356                 .sub_block_index = 0,
4357                 .name = "gfx",
4358         };
4359         int r;
4360
4361         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4362                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4363                 return 0;
4364         }
4365
4366         r = gfx_v9_0_do_edc_gds_workarounds(adev);
4367         if (r)
4368                 return r;
4369
4370         /* requires IBs so do in late init after IB pool is initialized */
4371         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4372         if (r)
4373                 return r;
4374
4375         /* handle resume path. */
4376         if (*ras_if) {
4377                 /* resend ras TA enable cmd during resume.
4378                  * prepare to handle failure.
4379                  */
4380                 ih_info.head = **ras_if;
4381                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4382                 if (r) {
4383                         if (r == -EAGAIN) {
4384                                 /* request a gpu reset. will run again. */
4385                                 amdgpu_ras_request_reset_on_boot(adev,
4386                                                 AMDGPU_RAS_BLOCK__GFX);
4387                                 return 0;
4388                         }
4389                         /* fail to enable ras, cleanup all. */
4390                         goto irq;
4391                 }
4392                 /* enable successfully. continue. */
4393                 goto resume;
4394         }
4395
4396         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4397         if (!*ras_if)
4398                 return -ENOMEM;
4399
4400         **ras_if = ras_block;
4401
4402         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4403         if (r) {
4404                 if (r == -EAGAIN) {
4405                         amdgpu_ras_request_reset_on_boot(adev,
4406                                         AMDGPU_RAS_BLOCK__GFX);
4407                         r = 0;
4408                 }
4409                 goto feature;
4410         }
4411
4412         ih_info.head = **ras_if;
4413         fs_info.head = **ras_if;
4414
4415         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4416         if (r)
4417                 goto interrupt;
4418
4419         amdgpu_ras_debugfs_create(adev, &fs_info);
4420
4421         r = amdgpu_ras_sysfs_create(adev, &fs_info);
4422         if (r)
4423                 goto sysfs;
4424 resume:
4425         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4426         if (r)
4427                 goto irq;
4428
4429         return 0;
4430 irq:
4431         amdgpu_ras_sysfs_remove(adev, *ras_if);
4432 sysfs:
4433         amdgpu_ras_debugfs_remove(adev, *ras_if);
4434         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4435 interrupt:
4436         amdgpu_ras_feature_enable(adev, *ras_if, 0);
4437 feature:
4438         kfree(*ras_if);
4439         *ras_if = NULL;
4440         return r;
4441 }
4442
4443 static int gfx_v9_0_late_init(void *handle)
4444 {
4445         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4446         int r;
4447
4448         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4449         if (r)
4450                 return r;
4451
4452         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4453         if (r)
4454                 return r;
4455
4456         r = gfx_v9_0_ecc_late_init(handle);
4457         if (r)
4458                 return r;
4459
4460         return 0;
4461 }
4462
4463 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4464 {
4465         uint32_t rlc_setting;
4466
4467         /* if RLC is not enabled, do nothing */
4468         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4469         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4470                 return false;
4471
4472         return true;
4473 }
4474
4475 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4476 {
4477         uint32_t data;
4478         unsigned i;
4479
4480         data = RLC_SAFE_MODE__CMD_MASK;
4481         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4482         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4483
4484         /* wait for RLC_SAFE_MODE */
4485         for (i = 0; i < adev->usec_timeout; i++) {
4486                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4487                         break;
4488                 udelay(1);
4489         }
4490 }
4491
4492 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4493 {
4494         uint32_t data;
4495
4496         data = RLC_SAFE_MODE__CMD_MASK;
4497         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4498 }
4499
4500 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4501                                                 bool enable)
4502 {
4503         amdgpu_gfx_rlc_enter_safe_mode(adev);
4504
4505         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4506                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4507                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4508                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4509         } else {
4510                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4511                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4512         }
4513
4514         amdgpu_gfx_rlc_exit_safe_mode(adev);
4515 }
4516
4517 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4518                                                 bool enable)
4519 {
4520         /* TODO: double check if we need to perform under safe mode */
4521         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4522
4523         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4524                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4525         else
4526                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4527
4528         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4529                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4530         else
4531                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4532
4533         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4534 }
4535
4536 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4537                                                       bool enable)
4538 {
4539         uint32_t data, def;
4540
4541         amdgpu_gfx_rlc_enter_safe_mode(adev);
4542
4543         /* It is disabled by HW by default */
4544         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4545                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4546                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4547
4548                 if (adev->asic_type != CHIP_VEGA12)
4549                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4550
4551                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4552                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4553                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4554
4555                 /* only for Vega10 & Raven1 */
4556                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4557
4558                 if (def != data)
4559                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4560
4561                 /* MGLS is a global flag to control all MGLS in GFX */
4562                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4563                         /* 2 - RLC memory Light sleep */
4564                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4565                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4566                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4567                                 if (def != data)
4568                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4569                         }
4570                         /* 3 - CP memory Light sleep */
4571                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4572                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4573                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4574                                 if (def != data)
4575                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4576                         }
4577                 }
4578         } else {
4579                 /* 1 - MGCG_OVERRIDE */
4580                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4581
4582                 if (adev->asic_type != CHIP_VEGA12)
4583                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4584
4585                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4586                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4587                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4588                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4589
4590                 if (def != data)
4591                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4592
4593                 /* 2 - disable MGLS in RLC */
4594                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4595                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4596                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4597                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4598                 }
4599
4600                 /* 3 - disable MGLS in CP */
4601                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4602                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4603                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4604                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4605                 }
4606         }
4607
4608         amdgpu_gfx_rlc_exit_safe_mode(adev);
4609 }
4610
4611 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4612                                            bool enable)
4613 {
4614         uint32_t data, def;
4615
4616         amdgpu_gfx_rlc_enter_safe_mode(adev);
4617
4618         /* Enable 3D CGCG/CGLS */
4619         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4620                 /* write cmd to clear cgcg/cgls ov */
4621                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4622                 /* unset CGCG override */
4623                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4624                 /* update CGCG and CGLS override bits */
4625                 if (def != data)
4626                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4627
4628                 /* enable 3Dcgcg FSM(0x0000363f) */
4629                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4630
4631                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4632                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4633                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4634                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4635                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4636                 if (def != data)
4637                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4638
4639                 /* set IDLE_POLL_COUNT(0x00900100) */
4640                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4641                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4642                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4643                 if (def != data)
4644                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4645         } else {
4646                 /* Disable CGCG/CGLS */
4647                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4648                 /* disable cgcg, cgls should be disabled */
4649                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4650                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4651                 /* disable cgcg and cgls in FSM */
4652                 if (def != data)
4653                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4654         }
4655
4656         amdgpu_gfx_rlc_exit_safe_mode(adev);
4657 }
4658
4659 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4660                                                       bool enable)
4661 {
4662         uint32_t def, data;
4663
4664         amdgpu_gfx_rlc_enter_safe_mode(adev);
4665
4666         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4667                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4668                 /* unset CGCG override */
4669                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4670                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4671                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4672                 else
4673                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4674                 /* update CGCG and CGLS override bits */
4675                 if (def != data)
4676                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4677
4678                 /* enable cgcg FSM(0x0000363F) */
4679                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4680
4681                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4682                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4683                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4684                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4685                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4686                 if (def != data)
4687                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4688
4689                 /* set IDLE_POLL_COUNT(0x00900100) */
4690                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4691                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4692                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4693                 if (def != data)
4694                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4695         } else {
4696                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4697                 /* reset CGCG/CGLS bits */
4698                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4699                 /* disable cgcg and cgls in FSM */
4700                 if (def != data)
4701                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4702         }
4703
4704         amdgpu_gfx_rlc_exit_safe_mode(adev);
4705 }
4706
4707 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4708                                             bool enable)
4709 {
4710         if (enable) {
4711                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4712                  * ===  MGCG + MGLS ===
4713                  */
4714                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4715                 /* ===  CGCG /CGLS for GFX 3D Only === */
4716                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4717                 /* ===  CGCG + CGLS === */
4718                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4719         } else {
4720                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4721                  * ===  CGCG + CGLS ===
4722                  */
4723                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4724                 /* ===  CGCG /CGLS for GFX 3D Only === */
4725                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4726                 /* ===  MGCG + MGLS === */
4727                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4728         }
4729         return 0;
4730 }
4731
4732 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4733         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4734         .set_safe_mode = gfx_v9_0_set_safe_mode,
4735         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4736         .init = gfx_v9_0_rlc_init,
4737         .get_csb_size = gfx_v9_0_get_csb_size,
4738         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4739         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4740         .resume = gfx_v9_0_rlc_resume,
4741         .stop = gfx_v9_0_rlc_stop,
4742         .reset = gfx_v9_0_rlc_reset,
4743         .start = gfx_v9_0_rlc_start
4744 };
4745
4746 static int gfx_v9_0_set_powergating_state(void *handle,
4747                                           enum amd_powergating_state state)
4748 {
4749         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4750         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4751
4752         switch (adev->asic_type) {
4753         case CHIP_RAVEN:
4754                 if (!enable) {
4755                         amdgpu_gfx_off_ctrl(adev, false);
4756                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4757                 }
4758                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4759                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4760                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4761                 } else {
4762                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4763                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4764                 }
4765
4766                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4767                         gfx_v9_0_enable_cp_power_gating(adev, true);
4768                 else
4769                         gfx_v9_0_enable_cp_power_gating(adev, false);
4770
4771                 /* update gfx cgpg state */
4772                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4773
4774                 /* update mgcg state */
4775                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4776
4777                 if (enable)
4778                         amdgpu_gfx_off_ctrl(adev, true);
4779                 break;
4780         case CHIP_VEGA12:
4781                 if (!enable) {
4782                         amdgpu_gfx_off_ctrl(adev, false);
4783                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4784                 } else {
4785                         amdgpu_gfx_off_ctrl(adev, true);
4786                 }
4787                 break;
4788         default:
4789                 break;
4790         }
4791
4792         return 0;
4793 }
4794
4795 static int gfx_v9_0_set_clockgating_state(void *handle,
4796                                           enum amd_clockgating_state state)
4797 {
4798         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4799
4800         if (amdgpu_sriov_vf(adev))
4801                 return 0;
4802
4803         switch (adev->asic_type) {
4804         case CHIP_VEGA10:
4805         case CHIP_VEGA12:
4806         case CHIP_VEGA20:
4807         case CHIP_RAVEN:
4808                 gfx_v9_0_update_gfx_clock_gating(adev,
4809                                                  state == AMD_CG_STATE_GATE ? true : false);
4810                 break;
4811         default:
4812                 break;
4813         }
4814         return 0;
4815 }
4816
4817 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4818 {
4819         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4820         int data;
4821
4822         if (amdgpu_sriov_vf(adev))
4823                 *flags = 0;
4824
4825         /* AMD_CG_SUPPORT_GFX_MGCG */
4826         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4827         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4828                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4829
4830         /* AMD_CG_SUPPORT_GFX_CGCG */
4831         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4832         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4833                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4834
4835         /* AMD_CG_SUPPORT_GFX_CGLS */
4836         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4837                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4838
4839         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4840         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4841         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4842                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4843
4844         /* AMD_CG_SUPPORT_GFX_CP_LS */
4845         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4846         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4847                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4848
4849         if (adev->asic_type != CHIP_ARCTURUS) {
4850                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4851                 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4852                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4853                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4854
4855                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4856                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4857                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4858         }
4859 }
4860
4861 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4862 {
4863         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4864 }
4865
4866 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4867 {
4868         struct amdgpu_device *adev = ring->adev;
4869         u64 wptr;
4870
4871         /* XXX check if swapping is necessary on BE */
4872         if (ring->use_doorbell) {
4873                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4874         } else {
4875                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4876                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4877         }
4878
4879         return wptr;
4880 }
4881
4882 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4883 {
4884         struct amdgpu_device *adev = ring->adev;
4885
4886         if (ring->use_doorbell) {
4887                 /* XXX check if swapping is necessary on BE */
4888                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4889                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4890         } else {
4891                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4892                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4893         }
4894 }
4895
4896 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4897 {
4898         struct amdgpu_device *adev = ring->adev;
4899         u32 ref_and_mask, reg_mem_engine;
4900         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4901
4902         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4903                 switch (ring->me) {
4904                 case 1:
4905                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4906                         break;
4907                 case 2:
4908                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4909                         break;
4910                 default:
4911                         return;
4912                 }
4913                 reg_mem_engine = 0;
4914         } else {
4915                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4916                 reg_mem_engine = 1; /* pfp */
4917         }
4918
4919         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4920                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4921                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4922                               ref_and_mask, ref_and_mask, 0x20);
4923 }
4924
4925 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4926                                         struct amdgpu_job *job,
4927                                         struct amdgpu_ib *ib,
4928                                         uint32_t flags)
4929 {
4930         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4931         u32 header, control = 0;
4932
4933         if (ib->flags & AMDGPU_IB_FLAG_CE)
4934                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4935         else
4936                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4937
4938         control |= ib->length_dw | (vmid << 24);
4939
4940         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4941                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4942
4943                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4944                         gfx_v9_0_ring_emit_de_meta(ring);
4945         }
4946
4947         amdgpu_ring_write(ring, header);
4948         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4949         amdgpu_ring_write(ring,
4950 #ifdef __BIG_ENDIAN
4951                 (2 << 0) |
4952 #endif
4953                 lower_32_bits(ib->gpu_addr));
4954         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4955         amdgpu_ring_write(ring, control);
4956 }
4957
4958 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4959                                           struct amdgpu_job *job,
4960                                           struct amdgpu_ib *ib,
4961                                           uint32_t flags)
4962 {
4963         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4964         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4965
4966         /* Currently, there is a high possibility to get wave ID mismatch
4967          * between ME and GDS, leading to a hw deadlock, because ME generates
4968          * different wave IDs than the GDS expects. This situation happens
4969          * randomly when at least 5 compute pipes use GDS ordered append.
4970          * The wave IDs generated by ME are also wrong after suspend/resume.
4971          * Those are probably bugs somewhere else in the kernel driver.
4972          *
4973          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4974          * GDS to 0 for this ring (me/pipe).
4975          */
4976         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4977                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4978                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4979                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4980         }
4981
4982         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4983         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4984         amdgpu_ring_write(ring,
4985 #ifdef __BIG_ENDIAN
4986                                 (2 << 0) |
4987 #endif
4988                                 lower_32_bits(ib->gpu_addr));
4989         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4990         amdgpu_ring_write(ring, control);
4991 }
4992
4993 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4994                                      u64 seq, unsigned flags)
4995 {
4996         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4997         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4998         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4999
5000         /* RELEASE_MEM - flush caches, send int */
5001         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5002         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5003                                                EOP_TC_NC_ACTION_EN) :
5004                                               (EOP_TCL1_ACTION_EN |
5005                                                EOP_TC_ACTION_EN |
5006                                                EOP_TC_WB_ACTION_EN |
5007                                                EOP_TC_MD_ACTION_EN)) |
5008                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5009                                  EVENT_INDEX(5)));
5010         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5011
5012         /*
5013          * the address should be Qword aligned if 64bit write, Dword
5014          * aligned if only send 32bit data low (discard data high)
5015          */
5016         if (write64bit)
5017                 BUG_ON(addr & 0x7);
5018         else
5019                 BUG_ON(addr & 0x3);
5020         amdgpu_ring_write(ring, lower_32_bits(addr));
5021         amdgpu_ring_write(ring, upper_32_bits(addr));
5022         amdgpu_ring_write(ring, lower_32_bits(seq));
5023         amdgpu_ring_write(ring, upper_32_bits(seq));
5024         amdgpu_ring_write(ring, 0);
5025 }
5026
5027 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5028 {
5029         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5030         uint32_t seq = ring->fence_drv.sync_seq;
5031         uint64_t addr = ring->fence_drv.gpu_addr;
5032
5033         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5034                               lower_32_bits(addr), upper_32_bits(addr),
5035                               seq, 0xffffffff, 4);
5036 }
5037
5038 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5039                                         unsigned vmid, uint64_t pd_addr)
5040 {
5041         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5042
5043         /* compute doesn't have PFP */
5044         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5045                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5046                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5047                 amdgpu_ring_write(ring, 0x0);
5048         }
5049 }
5050
5051 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5052 {
5053         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5054 }
5055
5056 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5057 {
5058         u64 wptr;
5059
5060         /* XXX check if swapping is necessary on BE */
5061         if (ring->use_doorbell)
5062                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5063         else
5064                 BUG();
5065         return wptr;
5066 }
5067
5068 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5069                                            bool acquire)
5070 {
5071         struct amdgpu_device *adev = ring->adev;
5072         int pipe_num, tmp, reg;
5073         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5074
5075         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5076
5077         /* first me only has 2 entries, GFX and HP3D */
5078         if (ring->me > 0)
5079                 pipe_num -= 2;
5080
5081         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5082         tmp = RREG32(reg);
5083         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5084         WREG32(reg, tmp);
5085 }
5086
5087 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5088                                             struct amdgpu_ring *ring,
5089                                             bool acquire)
5090 {
5091         int i, pipe;
5092         bool reserve;
5093         struct amdgpu_ring *iring;
5094
5095         mutex_lock(&adev->gfx.pipe_reserve_mutex);
5096         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5097         if (acquire)
5098                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5099         else
5100                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5101
5102         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5103                 /* Clear all reservations - everyone reacquires all resources */
5104                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5105                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5106                                                        true);
5107
5108                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5109                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5110                                                        true);
5111         } else {
5112                 /* Lower all pipes without a current reservation */
5113                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5114                         iring = &adev->gfx.gfx_ring[i];
5115                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5116                                                            iring->me,
5117                                                            iring->pipe,
5118                                                            0);
5119                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5120                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5121                 }
5122
5123                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5124                         iring = &adev->gfx.compute_ring[i];
5125                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5126                                                            iring->me,
5127                                                            iring->pipe,
5128                                                            0);
5129                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5130                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5131                 }
5132         }
5133
5134         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5135 }
5136
5137 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5138                                       struct amdgpu_ring *ring,
5139                                       bool acquire)
5140 {
5141         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5142         uint32_t queue_priority = acquire ? 0xf : 0x0;
5143
5144         mutex_lock(&adev->srbm_mutex);
5145         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5146
5147         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5148         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5149
5150         soc15_grbm_select(adev, 0, 0, 0, 0);
5151         mutex_unlock(&adev->srbm_mutex);
5152 }
5153
5154 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5155                                                enum drm_sched_priority priority)
5156 {
5157         struct amdgpu_device *adev = ring->adev;
5158         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5159
5160         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5161                 return;
5162
5163         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5164         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5165 }
5166
5167 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5168 {
5169         struct amdgpu_device *adev = ring->adev;
5170
5171         /* XXX check if swapping is necessary on BE */
5172         if (ring->use_doorbell) {
5173                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5174                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5175         } else{
5176                 BUG(); /* only DOORBELL method supported on gfx9 now */
5177         }
5178 }
5179
5180 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5181                                          u64 seq, unsigned int flags)
5182 {
5183         struct amdgpu_device *adev = ring->adev;
5184
5185         /* we only allocate 32bit for each seq wb address */
5186         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5187
5188         /* write fence seq to the "addr" */
5189         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5192         amdgpu_ring_write(ring, lower_32_bits(addr));
5193         amdgpu_ring_write(ring, upper_32_bits(addr));
5194         amdgpu_ring_write(ring, lower_32_bits(seq));
5195
5196         if (flags & AMDGPU_FENCE_FLAG_INT) {
5197                 /* set register to trigger INT */
5198                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5199                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5200                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5201                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5202                 amdgpu_ring_write(ring, 0);
5203                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5204         }
5205 }
5206
5207 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5208 {
5209         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5210         amdgpu_ring_write(ring, 0);
5211 }
5212
5213 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5214 {
5215         struct v9_ce_ib_state ce_payload = {0};
5216         uint64_t csa_addr;
5217         int cnt;
5218
5219         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5220         csa_addr = amdgpu_csa_vaddr(ring->adev);
5221
5222         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5223         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5224                                  WRITE_DATA_DST_SEL(8) |
5225                                  WR_CONFIRM) |
5226                                  WRITE_DATA_CACHE_POLICY(0));
5227         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5228         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5229         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5230 }
5231
5232 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5233 {
5234         struct v9_de_ib_state de_payload = {0};
5235         uint64_t csa_addr, gds_addr;
5236         int cnt;
5237
5238         csa_addr = amdgpu_csa_vaddr(ring->adev);
5239         gds_addr = csa_addr + 4096;
5240         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5241         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5242
5243         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5244         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5245         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5246                                  WRITE_DATA_DST_SEL(8) |
5247                                  WR_CONFIRM) |
5248                                  WRITE_DATA_CACHE_POLICY(0));
5249         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5250         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5251         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5252 }
5253
5254 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5255 {
5256         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5257         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5258 }
5259
5260 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5261 {
5262         uint32_t dw2 = 0;
5263
5264         if (amdgpu_sriov_vf(ring->adev))
5265                 gfx_v9_0_ring_emit_ce_meta(ring);
5266
5267         gfx_v9_0_ring_emit_tmz(ring, true);
5268
5269         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5270         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5271                 /* set load_global_config & load_global_uconfig */
5272                 dw2 |= 0x8001;
5273                 /* set load_cs_sh_regs */
5274                 dw2 |= 0x01000000;
5275                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5276                 dw2 |= 0x10002;
5277
5278                 /* set load_ce_ram if preamble presented */
5279                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5280                         dw2 |= 0x10000000;
5281         } else {
5282                 /* still load_ce_ram if this is the first time preamble presented
5283                  * although there is no context switch happens.
5284                  */
5285                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5286                         dw2 |= 0x10000000;
5287         }
5288
5289         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5290         amdgpu_ring_write(ring, dw2);
5291         amdgpu_ring_write(ring, 0);
5292 }
5293
5294 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5295 {
5296         unsigned ret;
5297         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5298         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5299         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5300         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5301         ret = ring->wptr & ring->buf_mask;
5302         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5303         return ret;
5304 }
5305
5306 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5307 {
5308         unsigned cur;
5309         BUG_ON(offset > ring->buf_mask);
5310         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5311
5312         cur = (ring->wptr & ring->buf_mask) - 1;
5313         if (likely(cur > offset))
5314                 ring->ring[offset] = cur - offset;
5315         else
5316                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5317 }
5318
5319 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5320 {
5321         struct amdgpu_device *adev = ring->adev;
5322
5323         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5324         amdgpu_ring_write(ring, 0 |     /* src: register*/
5325                                 (5 << 8) |      /* dst: memory */
5326                                 (1 << 20));     /* write confirm */
5327         amdgpu_ring_write(ring, reg);
5328         amdgpu_ring_write(ring, 0);
5329         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5330                                 adev->virt.reg_val_offs * 4));
5331         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5332                                 adev->virt.reg_val_offs * 4));
5333 }
5334
5335 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5336                                     uint32_t val)
5337 {
5338         uint32_t cmd = 0;
5339
5340         switch (ring->funcs->type) {
5341         case AMDGPU_RING_TYPE_GFX:
5342                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5343                 break;
5344         case AMDGPU_RING_TYPE_KIQ:
5345                 cmd = (1 << 16); /* no inc addr */
5346                 break;
5347         default:
5348                 cmd = WR_CONFIRM;
5349                 break;
5350         }
5351         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5352         amdgpu_ring_write(ring, cmd);
5353         amdgpu_ring_write(ring, reg);
5354         amdgpu_ring_write(ring, 0);
5355         amdgpu_ring_write(ring, val);
5356 }
5357
5358 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5359                                         uint32_t val, uint32_t mask)
5360 {
5361         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5362 }
5363
5364 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5365                                                   uint32_t reg0, uint32_t reg1,
5366                                                   uint32_t ref, uint32_t mask)
5367 {
5368         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5369         struct amdgpu_device *adev = ring->adev;
5370         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5371                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5372
5373         if (fw_version_ok)
5374                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5375                                       ref, mask, 0x20);
5376         else
5377                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5378                                                            ref, mask);
5379 }
5380
5381 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5382 {
5383         struct amdgpu_device *adev = ring->adev;
5384         uint32_t value = 0;
5385
5386         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5387         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5388         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5389         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5390         WREG32(mmSQ_CMD, value);
5391 }
5392
5393 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5394                                                  enum amdgpu_interrupt_state state)
5395 {
5396         switch (state) {
5397         case AMDGPU_IRQ_STATE_DISABLE:
5398         case AMDGPU_IRQ_STATE_ENABLE:
5399                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5400                                TIME_STAMP_INT_ENABLE,
5401                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5402                 break;
5403         default:
5404                 break;
5405         }
5406 }
5407
5408 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5409                                                      int me, int pipe,
5410                                                      enum amdgpu_interrupt_state state)
5411 {
5412         u32 mec_int_cntl, mec_int_cntl_reg;
5413
5414         /*
5415          * amdgpu controls only the first MEC. That's why this function only
5416          * handles the setting of interrupts for this specific MEC. All other
5417          * pipes' interrupts are set by amdkfd.
5418          */
5419
5420         if (me == 1) {
5421                 switch (pipe) {
5422                 case 0:
5423                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5424                         break;
5425                 case 1:
5426                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5427                         break;
5428                 case 2:
5429                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5430                         break;
5431                 case 3:
5432                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5433                         break;
5434                 default:
5435                         DRM_DEBUG("invalid pipe %d\n", pipe);
5436                         return;
5437                 }
5438         } else {
5439                 DRM_DEBUG("invalid me %d\n", me);
5440                 return;
5441         }
5442
5443         switch (state) {
5444         case AMDGPU_IRQ_STATE_DISABLE:
5445                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5446                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5447                                              TIME_STAMP_INT_ENABLE, 0);
5448                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5449                 break;
5450         case AMDGPU_IRQ_STATE_ENABLE:
5451                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5452                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5453                                              TIME_STAMP_INT_ENABLE, 1);
5454                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5455                 break;
5456         default:
5457                 break;
5458         }
5459 }
5460
5461 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5462                                              struct amdgpu_irq_src *source,
5463                                              unsigned type,
5464                                              enum amdgpu_interrupt_state state)
5465 {
5466         switch (state) {
5467         case AMDGPU_IRQ_STATE_DISABLE:
5468         case AMDGPU_IRQ_STATE_ENABLE:
5469                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5470                                PRIV_REG_INT_ENABLE,
5471                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5472                 break;
5473         default:
5474                 break;
5475         }
5476
5477         return 0;
5478 }
5479
5480 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5481                                               struct amdgpu_irq_src *source,
5482                                               unsigned type,
5483                                               enum amdgpu_interrupt_state state)
5484 {
5485         switch (state) {
5486         case AMDGPU_IRQ_STATE_DISABLE:
5487         case AMDGPU_IRQ_STATE_ENABLE:
5488                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5489                                PRIV_INSTR_INT_ENABLE,
5490                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5491         default:
5492                 break;
5493         }
5494
5495         return 0;
5496 }
5497
5498 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5499         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5500                         CP_ECC_ERROR_INT_ENABLE, 1)
5501
5502 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5503         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5504                         CP_ECC_ERROR_INT_ENABLE, 0)
5505
5506 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5507                                               struct amdgpu_irq_src *source,
5508                                               unsigned type,
5509                                               enum amdgpu_interrupt_state state)
5510 {
5511         switch (state) {
5512         case AMDGPU_IRQ_STATE_DISABLE:
5513                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5514                                 CP_ECC_ERROR_INT_ENABLE, 0);
5515                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5516                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5517                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5518                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5519                 break;
5520
5521         case AMDGPU_IRQ_STATE_ENABLE:
5522                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5523                                 CP_ECC_ERROR_INT_ENABLE, 1);
5524                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5525                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5526                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5527                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5528                 break;
5529         default:
5530                 break;
5531         }
5532
5533         return 0;
5534 }
5535
5536
5537 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5538                                             struct amdgpu_irq_src *src,
5539                                             unsigned type,
5540                                             enum amdgpu_interrupt_state state)
5541 {
5542         switch (type) {
5543         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5544                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5545                 break;
5546         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5547                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5548                 break;
5549         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5550                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5551                 break;
5552         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5553                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5554                 break;
5555         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5556                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5557                 break;
5558         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5559                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5560                 break;
5561         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5562                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5563                 break;
5564         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5565                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5566                 break;
5567         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5568                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5569                 break;
5570         default:
5571                 break;
5572         }
5573         return 0;
5574 }
5575
5576 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5577                             struct amdgpu_irq_src *source,
5578                             struct amdgpu_iv_entry *entry)
5579 {
5580         int i;
5581         u8 me_id, pipe_id, queue_id;
5582         struct amdgpu_ring *ring;
5583
5584         DRM_DEBUG("IH: CP EOP\n");
5585         me_id = (entry->ring_id & 0x0c) >> 2;
5586         pipe_id = (entry->ring_id & 0x03) >> 0;
5587         queue_id = (entry->ring_id & 0x70) >> 4;
5588
5589         switch (me_id) {
5590         case 0:
5591                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5592                 break;
5593         case 1:
5594         case 2:
5595                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5596                         ring = &adev->gfx.compute_ring[i];
5597                         /* Per-queue interrupt is supported for MEC starting from VI.
5598                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5599                           */
5600                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5601                                 amdgpu_fence_process(ring);
5602                 }
5603                 break;
5604         }
5605         return 0;
5606 }
5607
5608 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5609                            struct amdgpu_iv_entry *entry)
5610 {
5611         u8 me_id, pipe_id, queue_id;
5612         struct amdgpu_ring *ring;
5613         int i;
5614
5615         me_id = (entry->ring_id & 0x0c) >> 2;
5616         pipe_id = (entry->ring_id & 0x03) >> 0;
5617         queue_id = (entry->ring_id & 0x70) >> 4;
5618
5619         switch (me_id) {
5620         case 0:
5621                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5622                 break;
5623         case 1:
5624         case 2:
5625                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5626                         ring = &adev->gfx.compute_ring[i];
5627                         if (ring->me == me_id && ring->pipe == pipe_id &&
5628                             ring->queue == queue_id)
5629                                 drm_sched_fault(&ring->sched);
5630                 }
5631                 break;
5632         }
5633 }
5634
5635 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5636                                  struct amdgpu_irq_src *source,
5637                                  struct amdgpu_iv_entry *entry)
5638 {
5639         DRM_ERROR("Illegal register access in command stream\n");
5640         gfx_v9_0_fault(adev, entry);
5641         return 0;
5642 }
5643
5644 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5645                                   struct amdgpu_irq_src *source,
5646                                   struct amdgpu_iv_entry *entry)
5647 {
5648         DRM_ERROR("Illegal instruction in command stream\n");
5649         gfx_v9_0_fault(adev, entry);
5650         return 0;
5651 }
5652
5653 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5654                 struct ras_err_data *err_data,
5655                 struct amdgpu_iv_entry *entry)
5656 {
5657         /* TODO ue will trigger an interrupt. */
5658         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5659         if (adev->gfx.funcs->query_ras_error_count)
5660                 adev->gfx.funcs->query_ras_error_count(adev, err_data);
5661         amdgpu_ras_reset_gpu(adev, 0);
5662         return AMDGPU_RAS_SUCCESS;
5663 }
5664
5665 static const struct {
5666         const char *name;
5667         uint32_t ip;
5668         uint32_t inst;
5669         uint32_t seg;
5670         uint32_t reg_offset;
5671         uint32_t per_se_instance;
5672         int32_t num_instance;
5673         uint32_t sec_count_mask;
5674         uint32_t ded_count_mask;
5675 } gfx_ras_edc_regs[] = {
5676         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5677           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5678           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5679         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5680           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5681           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5682         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5683           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5684         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5685           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5686         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5687           REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5688           REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5689         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5690           REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5691         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5692           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5693           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5694         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5695           REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5696           REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5697         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5698           REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5699         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5700           REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5701         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5702           REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5703         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5704           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5705           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5706         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5707           REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5708         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5709           0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5710           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5711         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5712           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5713           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5714           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5715         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5716           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5717           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5718         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5719           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5720           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5721           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5722         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5723           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5724           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5725           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5726         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5727           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5728           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5729           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5730         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5731           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5732           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5733           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5734         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5735           REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5736         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5737           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5738           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5739         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5740           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5741         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5742           REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5743         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5744           REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5745         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5746           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5747         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5748           REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5749         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5750           REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5751         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5752           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5753           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5754         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5755           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5756           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5757         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5758           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5759           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5760         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5761           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5762           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5763         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5764           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5765           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5766         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5767           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5768         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5769           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5770         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5771           REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5772         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5773           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5774         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5775           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5776         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5777           REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5778         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5779           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5780         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5781           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5782         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5783           16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5784         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5785           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5786           0 },
5787         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5788           16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5789         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5790           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5791           0 },
5792         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5793           16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5794         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5795           REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5796         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5797           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5798           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5799         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5800           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5801           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5802         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5803           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5804         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5805           REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5806         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5807           REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5808         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5809           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5810           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5811         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5812           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5813           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5814         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5815           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5816           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5817         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5818           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5819           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5820         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5821           REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5822         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5823           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5824           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5825         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5826           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5827           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5828         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5829           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5830           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5831         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5832           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5833           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5834         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5835           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5836           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5837         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5838           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5839           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5840         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5841           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5842           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5843         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5844           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5845           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5846         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5847           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5848           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5849         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5850           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5851           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5852         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5853           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5854           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5855         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5856           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5857           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5858         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5859           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5860           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5861         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5862           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5863           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5864         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5865           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5866           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5867         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5868           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5869           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5870         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5871           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5872           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5873         { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5874           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5875           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5876           0 },
5877         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5878           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5879         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5880           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5881         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5882           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5883         { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5884           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5885           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5886         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5887           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5888           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5889         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5890           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5891           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5892         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5893           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5894           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5895         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5896           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5897           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5898         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5899           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5900           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5901         { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5902           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5903           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5904           0 },
5905         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5906           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5907         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5908           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5909         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5910           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5911         { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5912           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5913           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5914         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5915           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5916           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5917         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5918           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5919           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5920         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5921           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5922           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5923         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5924           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5925           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
5926         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5927           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5928           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
5929         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5930           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
5931         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5932           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
5933         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5934           REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
5935         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5936           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
5937         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5938           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
5939         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5940           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5941           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
5942         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5943           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5944           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
5945         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5946           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5947           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
5948         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5949           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
5950         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5951           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
5952         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5953           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
5954         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5955           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
5956         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5957           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
5958         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5959           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
5960 };
5961
5962 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5963                                      void *inject_if)
5964 {
5965         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5966         int ret;
5967         struct ta_ras_trigger_error_input block_info = { 0 };
5968
5969         if (adev->asic_type != CHIP_VEGA20)
5970                 return -EINVAL;
5971
5972         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5973                 return -EPERM;
5974
5975         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5976               info->head.type)) {
5977                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5978                         ras_gfx_subblocks[info->head.sub_block_index].name,
5979                         info->head.type);
5980                 return -EPERM;
5981         }
5982
5983         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5984               info->head.type)) {
5985                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5986                         ras_gfx_subblocks[info->head.sub_block_index].name,
5987                         info->head.type);
5988                 return -EPERM;
5989         }
5990
5991         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
5992         block_info.sub_block_index =
5993                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
5994         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
5995         block_info.address = info->address;
5996         block_info.value = info->value;
5997
5998         mutex_lock(&adev->grbm_idx_mutex);
5999         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6000         mutex_unlock(&adev->grbm_idx_mutex);
6001
6002         return ret;
6003 }
6004
6005 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6006                                           void *ras_error_status)
6007 {
6008         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6009         uint32_t sec_count, ded_count;
6010         uint32_t i;
6011         uint32_t reg_value;
6012         uint32_t se_id, instance_id;
6013
6014         if (adev->asic_type != CHIP_VEGA20)
6015                 return -EINVAL;
6016
6017         err_data->ue_count = 0;
6018         err_data->ce_count = 0;
6019
6020         mutex_lock(&adev->grbm_idx_mutex);
6021         for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6022                 for (instance_id = 0; instance_id < 256; instance_id++) {
6023                         for (i = 0;
6024                              i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6025                              i++) {
6026                                 if (se_id != 0 &&
6027                                     !gfx_ras_edc_regs[i].per_se_instance)
6028                                         continue;
6029                                 if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6030                                         continue;
6031
6032                                 gfx_v9_0_select_se_sh(adev, se_id, 0,
6033                                                       instance_id);
6034
6035                                 reg_value = RREG32(
6036                                         adev->reg_offset[gfx_ras_edc_regs[i].ip]
6037                                                         [gfx_ras_edc_regs[i].inst]
6038                                                         [gfx_ras_edc_regs[i].seg] +
6039                                         gfx_ras_edc_regs[i].reg_offset);
6040                                 sec_count = reg_value &
6041                                             gfx_ras_edc_regs[i].sec_count_mask;
6042                                 ded_count = reg_value &
6043                                             gfx_ras_edc_regs[i].ded_count_mask;
6044                                 if (sec_count) {
6045                                         DRM_INFO(
6046                                                 "Instance[%d][%d]: SubBlock %s, SEC %d\n",
6047                                                 se_id, instance_id,
6048                                                 gfx_ras_edc_regs[i].name,
6049                                                 sec_count);
6050                                         err_data->ce_count++;
6051                                 }
6052
6053                                 if (ded_count) {
6054                                         DRM_INFO(
6055                                                 "Instance[%d][%d]: SubBlock %s, DED %d\n",
6056                                                 se_id, instance_id,
6057                                                 gfx_ras_edc_regs[i].name,
6058                                                 ded_count);
6059                                         err_data->ue_count++;
6060                                 }
6061                         }
6062                 }
6063         }
6064         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6065         mutex_unlock(&adev->grbm_idx_mutex);
6066
6067         return 0;
6068 }
6069
6070 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6071                                   struct amdgpu_irq_src *source,
6072                                   struct amdgpu_iv_entry *entry)
6073 {
6074         struct ras_common_if *ras_if = adev->gfx.ras_if;
6075         struct ras_dispatch_if ih_data = {
6076                 .entry = entry,
6077         };
6078
6079         if (!ras_if)
6080                 return 0;
6081
6082         ih_data.head = *ras_if;
6083
6084         DRM_ERROR("CP ECC ERROR IRQ\n");
6085         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6086         return 0;
6087 }
6088
6089 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6090         .name = "gfx_v9_0",
6091         .early_init = gfx_v9_0_early_init,
6092         .late_init = gfx_v9_0_late_init,
6093         .sw_init = gfx_v9_0_sw_init,
6094         .sw_fini = gfx_v9_0_sw_fini,
6095         .hw_init = gfx_v9_0_hw_init,
6096         .hw_fini = gfx_v9_0_hw_fini,
6097         .suspend = gfx_v9_0_suspend,
6098         .resume = gfx_v9_0_resume,
6099         .is_idle = gfx_v9_0_is_idle,
6100         .wait_for_idle = gfx_v9_0_wait_for_idle,
6101         .soft_reset = gfx_v9_0_soft_reset,
6102         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6103         .set_powergating_state = gfx_v9_0_set_powergating_state,
6104         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6105 };
6106
6107 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6108         .type = AMDGPU_RING_TYPE_GFX,
6109         .align_mask = 0xff,
6110         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6111         .support_64bit_ptrs = true,
6112         .vmhub = AMDGPU_GFXHUB_0,
6113         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6114         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6115         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6116         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6117                 5 +  /* COND_EXEC */
6118                 7 +  /* PIPELINE_SYNC */
6119                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6120                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6121                 2 + /* VM_FLUSH */
6122                 8 +  /* FENCE for VM_FLUSH */
6123                 20 + /* GDS switch */
6124                 4 + /* double SWITCH_BUFFER,
6125                        the first COND_EXEC jump to the place just
6126                            prior to this double SWITCH_BUFFER  */
6127                 5 + /* COND_EXEC */
6128                 7 +      /*     HDP_flush */
6129                 4 +      /*     VGT_flush */
6130                 14 + /* CE_META */
6131                 31 + /* DE_META */
6132                 3 + /* CNTX_CTRL */
6133                 5 + /* HDP_INVL */
6134                 8 + 8 + /* FENCE x2 */
6135                 2, /* SWITCH_BUFFER */
6136         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6137         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6138         .emit_fence = gfx_v9_0_ring_emit_fence,
6139         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6140         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6141         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6142         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6143         .test_ring = gfx_v9_0_ring_test_ring,
6144         .test_ib = gfx_v9_0_ring_test_ib,
6145         .insert_nop = amdgpu_ring_insert_nop,
6146         .pad_ib = amdgpu_ring_generic_pad_ib,
6147         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6148         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6149         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6150         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6151         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6152         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6153         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6154         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6155         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6156 };
6157
6158 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6159         .type = AMDGPU_RING_TYPE_COMPUTE,
6160         .align_mask = 0xff,
6161         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6162         .support_64bit_ptrs = true,
6163         .vmhub = AMDGPU_GFXHUB_0,
6164         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6165         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6166         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6167         .emit_frame_size =
6168                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6169                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6170                 5 + /* hdp invalidate */
6171                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6172                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6173                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6174                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6175                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6176         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6177         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6178         .emit_fence = gfx_v9_0_ring_emit_fence,
6179         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6180         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6181         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6182         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6183         .test_ring = gfx_v9_0_ring_test_ring,
6184         .test_ib = gfx_v9_0_ring_test_ib,
6185         .insert_nop = amdgpu_ring_insert_nop,
6186         .pad_ib = amdgpu_ring_generic_pad_ib,
6187         .set_priority = gfx_v9_0_ring_set_priority_compute,
6188         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6189         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6190         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6191 };
6192
6193 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6194         .type = AMDGPU_RING_TYPE_KIQ,
6195         .align_mask = 0xff,
6196         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6197         .support_64bit_ptrs = true,
6198         .vmhub = AMDGPU_GFXHUB_0,
6199         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6200         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6201         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6202         .emit_frame_size =
6203                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6204                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6205                 5 + /* hdp invalidate */
6206                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6207                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6208                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6209                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6210                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6211         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6212         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6213         .test_ring = gfx_v9_0_ring_test_ring,
6214         .insert_nop = amdgpu_ring_insert_nop,
6215         .pad_ib = amdgpu_ring_generic_pad_ib,
6216         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6217         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6218         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6219         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6220 };
6221
6222 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6223 {
6224         int i;
6225
6226         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6227
6228         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6229                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6230
6231         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6232                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6233 }
6234
6235 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6236         .set = gfx_v9_0_set_eop_interrupt_state,
6237         .process = gfx_v9_0_eop_irq,
6238 };
6239
6240 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6241         .set = gfx_v9_0_set_priv_reg_fault_state,
6242         .process = gfx_v9_0_priv_reg_irq,
6243 };
6244
6245 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6246         .set = gfx_v9_0_set_priv_inst_fault_state,
6247         .process = gfx_v9_0_priv_inst_irq,
6248 };
6249
6250 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6251         .set = gfx_v9_0_set_cp_ecc_error_state,
6252         .process = gfx_v9_0_cp_ecc_error_irq,
6253 };
6254
6255
6256 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6257 {
6258         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6259         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6260
6261         adev->gfx.priv_reg_irq.num_types = 1;
6262         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6263
6264         adev->gfx.priv_inst_irq.num_types = 1;
6265         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6266
6267         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6268         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6269 }
6270
6271 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6272 {
6273         switch (adev->asic_type) {
6274         case CHIP_VEGA10:
6275         case CHIP_VEGA12:
6276         case CHIP_VEGA20:
6277         case CHIP_RAVEN:
6278         case CHIP_ARCTURUS:
6279                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6280                 break;
6281         default:
6282                 break;
6283         }
6284 }
6285
6286 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6287 {
6288         /* init asci gds info */
6289         switch (adev->asic_type) {
6290         case CHIP_VEGA10:
6291         case CHIP_VEGA12:
6292         case CHIP_VEGA20:
6293                 adev->gds.gds_size = 0x10000;
6294                 break;
6295         case CHIP_RAVEN:
6296         case CHIP_ARCTURUS:
6297                 adev->gds.gds_size = 0x1000;
6298                 break;
6299         default:
6300                 adev->gds.gds_size = 0x10000;
6301                 break;
6302         }
6303
6304         switch (adev->asic_type) {
6305         case CHIP_VEGA10:
6306         case CHIP_VEGA20:
6307                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6308                 break;
6309         case CHIP_VEGA12:
6310                 adev->gds.gds_compute_max_wave_id = 0x27f;
6311                 break;
6312         case CHIP_RAVEN:
6313                 if (adev->rev_id >= 0x8)
6314                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6315                 else
6316                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6317                 break;
6318         case CHIP_ARCTURUS:
6319                 adev->gds.gds_compute_max_wave_id = 0xfff;
6320                 break;
6321         default:
6322                 /* this really depends on the chip */
6323                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6324                 break;
6325         }
6326
6327         adev->gds.gws_size = 64;
6328         adev->gds.oa_size = 16;
6329 }
6330
6331 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6332                                                  u32 bitmap)
6333 {
6334         u32 data;
6335
6336         if (!bitmap)
6337                 return;
6338
6339         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6340         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6341
6342         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6343 }
6344
6345 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6346 {
6347         u32 data, mask;
6348
6349         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6350         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6351
6352         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6353         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6354
6355         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6356
6357         return (~data) & mask;
6358 }
6359
6360 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6361                                  struct amdgpu_cu_info *cu_info)
6362 {
6363         int i, j, k, counter, active_cu_number = 0;
6364         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6365         unsigned disable_masks[4 * 4];
6366
6367         if (!adev || !cu_info)
6368                 return -EINVAL;
6369
6370         /*
6371          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6372          */
6373         if (adev->gfx.config.max_shader_engines *
6374                 adev->gfx.config.max_sh_per_se > 16)
6375                 return -EINVAL;
6376
6377         amdgpu_gfx_parse_disable_cu(disable_masks,
6378                                     adev->gfx.config.max_shader_engines,
6379                                     adev->gfx.config.max_sh_per_se);
6380
6381         mutex_lock(&adev->grbm_idx_mutex);
6382         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6383                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6384                         mask = 1;
6385                         ao_bitmap = 0;
6386                         counter = 0;
6387                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6388                         gfx_v9_0_set_user_cu_inactive_bitmap(
6389                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6390                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6391
6392                         /*
6393                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6394                          * 4x4 size array, and it's usually suitable for Vega
6395                          * ASICs which has 4*2 SE/SH layout.
6396                          * But for Arcturus, SE/SH layout is changed to 8*1.
6397                          * To mostly reduce the impact, we make it compatible
6398                          * with current bitmap array as below:
6399                          *    SE4,SH0 --> bitmap[0][1]
6400                          *    SE5,SH0 --> bitmap[1][1]
6401                          *    SE6,SH0 --> bitmap[2][1]
6402                          *    SE7,SH0 --> bitmap[3][1]
6403                          */
6404                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6405
6406                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6407                                 if (bitmap & mask) {
6408                                         if (counter < adev->gfx.config.max_cu_per_sh)
6409                                                 ao_bitmap |= mask;
6410                                         counter ++;
6411                                 }
6412                                 mask <<= 1;
6413                         }
6414                         active_cu_number += counter;
6415                         if (i < 2 && j < 2)
6416                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6417                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6418                 }
6419         }
6420         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6421         mutex_unlock(&adev->grbm_idx_mutex);
6422
6423         cu_info->number = active_cu_number;
6424         cu_info->ao_cu_mask = ao_cu_mask;
6425         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6426
6427         return 0;
6428 }
6429
6430 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6431 {
6432         .type = AMD_IP_BLOCK_TYPE_GFX,
6433         .major = 9,
6434         .minor = 0,
6435         .rev = 0,
6436         .funcs = &gfx_v9_0_ip_funcs,
6437 };