Linux 5.3-rc3
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
313
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 {
316         switch (adev->asic_type) {
317         case CHIP_VEGA10:
318                 if (!amdgpu_virt_support_skip_setting(adev)) {
319                         soc15_program_register_sequence(adev,
320                                                          golden_settings_gc_9_0,
321                                                          ARRAY_SIZE(golden_settings_gc_9_0));
322                         soc15_program_register_sequence(adev,
323                                                          golden_settings_gc_9_0_vg10,
324                                                          ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325                 }
326                 break;
327         case CHIP_VEGA12:
328                 soc15_program_register_sequence(adev,
329                                                 golden_settings_gc_9_2_1,
330                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
331                 soc15_program_register_sequence(adev,
332                                                 golden_settings_gc_9_2_1_vg12,
333                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
334                 break;
335         case CHIP_VEGA20:
336                 soc15_program_register_sequence(adev,
337                                                 golden_settings_gc_9_0,
338                                                 ARRAY_SIZE(golden_settings_gc_9_0));
339                 soc15_program_register_sequence(adev,
340                                                 golden_settings_gc_9_0_vg20,
341                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342                 break;
343         case CHIP_RAVEN:
344                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345                                                 ARRAY_SIZE(golden_settings_gc_9_1));
346                 if (adev->rev_id >= 8)
347                         soc15_program_register_sequence(adev,
348                                                         golden_settings_gc_9_1_rv2,
349                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350                 else
351                         soc15_program_register_sequence(adev,
352                                                         golden_settings_gc_9_1_rv1,
353                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354                 break;
355         default:
356                 break;
357         }
358
359         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
361 }
362
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 {
365         adev->gfx.scratch.num_reg = 8;
366         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368 }
369
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371                                        bool wc, uint32_t reg, uint32_t val)
372 {
373         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375                                 WRITE_DATA_DST_SEL(0) |
376                                 (wc ? WR_CONFIRM : 0));
377         amdgpu_ring_write(ring, reg);
378         amdgpu_ring_write(ring, 0);
379         amdgpu_ring_write(ring, val);
380 }
381
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383                                   int mem_space, int opt, uint32_t addr0,
384                                   uint32_t addr1, uint32_t ref, uint32_t mask,
385                                   uint32_t inv)
386 {
387         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388         amdgpu_ring_write(ring,
389                                  /* memory (1) or register (0) */
390                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
392                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
393                                  WAIT_REG_MEM_ENGINE(eng_sel)));
394
395         if (mem_space)
396                 BUG_ON(addr0 & 0x3); /* Dword align */
397         amdgpu_ring_write(ring, addr0);
398         amdgpu_ring_write(ring, addr1);
399         amdgpu_ring_write(ring, ref);
400         amdgpu_ring_write(ring, mask);
401         amdgpu_ring_write(ring, inv); /* poll interval */
402 }
403
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 {
406         struct amdgpu_device *adev = ring->adev;
407         uint32_t scratch;
408         uint32_t tmp = 0;
409         unsigned i;
410         int r;
411
412         r = amdgpu_gfx_scratch_get(adev, &scratch);
413         if (r)
414                 return r;
415
416         WREG32(scratch, 0xCAFEDEAD);
417         r = amdgpu_ring_alloc(ring, 3);
418         if (r)
419                 goto error_free_scratch;
420
421         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423         amdgpu_ring_write(ring, 0xDEADBEEF);
424         amdgpu_ring_commit(ring);
425
426         for (i = 0; i < adev->usec_timeout; i++) {
427                 tmp = RREG32(scratch);
428                 if (tmp == 0xDEADBEEF)
429                         break;
430                 udelay(1);
431         }
432
433         if (i >= adev->usec_timeout)
434                 r = -ETIMEDOUT;
435
436 error_free_scratch:
437         amdgpu_gfx_scratch_free(adev, scratch);
438         return r;
439 }
440
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443         struct amdgpu_device *adev = ring->adev;
444         struct amdgpu_ib ib;
445         struct dma_fence *f = NULL;
446
447         unsigned index;
448         uint64_t gpu_addr;
449         uint32_t tmp;
450         long r;
451
452         r = amdgpu_device_wb_get(adev, &index);
453         if (r)
454                 return r;
455
456         gpu_addr = adev->wb.gpu_addr + (index * 4);
457         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458         memset(&ib, 0, sizeof(ib));
459         r = amdgpu_ib_get(adev, NULL, 16, &ib);
460         if (r)
461                 goto err1;
462
463         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465         ib.ptr[2] = lower_32_bits(gpu_addr);
466         ib.ptr[3] = upper_32_bits(gpu_addr);
467         ib.ptr[4] = 0xDEADBEEF;
468         ib.length_dw = 5;
469
470         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471         if (r)
472                 goto err2;
473
474         r = dma_fence_wait_timeout(f, false, timeout);
475         if (r == 0) {
476                 r = -ETIMEDOUT;
477                 goto err2;
478         } else if (r < 0) {
479                 goto err2;
480         }
481
482         tmp = adev->wb.wb[index];
483         if (tmp == 0xDEADBEEF)
484                 r = 0;
485         else
486                 r = -EINVAL;
487
488 err2:
489         amdgpu_ib_free(adev, &ib, NULL);
490         dma_fence_put(f);
491 err1:
492         amdgpu_device_wb_free(adev, index);
493         return r;
494 }
495
496
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 {
499         release_firmware(adev->gfx.pfp_fw);
500         adev->gfx.pfp_fw = NULL;
501         release_firmware(adev->gfx.me_fw);
502         adev->gfx.me_fw = NULL;
503         release_firmware(adev->gfx.ce_fw);
504         adev->gfx.ce_fw = NULL;
505         release_firmware(adev->gfx.rlc_fw);
506         adev->gfx.rlc_fw = NULL;
507         release_firmware(adev->gfx.mec_fw);
508         adev->gfx.mec_fw = NULL;
509         release_firmware(adev->gfx.mec2_fw);
510         adev->gfx.mec2_fw = NULL;
511
512         kfree(adev->gfx.rlc.register_list_format);
513 }
514
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 {
517         const struct rlc_firmware_header_v2_1 *rlc_hdr;
518
519         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534 }
535
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 {
538         adev->gfx.me_fw_write_wait = false;
539         adev->gfx.mec_fw_write_wait = false;
540
541         switch (adev->asic_type) {
542         case CHIP_VEGA10:
543                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544                     (adev->gfx.me_feature_version >= 42) &&
545                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
546                     (adev->gfx.pfp_feature_version >= 42))
547                         adev->gfx.me_fw_write_wait = true;
548
549                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
550                     (adev->gfx.mec_feature_version >= 42))
551                         adev->gfx.mec_fw_write_wait = true;
552                 break;
553         case CHIP_VEGA12:
554                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555                     (adev->gfx.me_feature_version >= 44) &&
556                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
557                     (adev->gfx.pfp_feature_version >= 44))
558                         adev->gfx.me_fw_write_wait = true;
559
560                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
561                     (adev->gfx.mec_feature_version >= 44))
562                         adev->gfx.mec_fw_write_wait = true;
563                 break;
564         case CHIP_VEGA20:
565                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566                     (adev->gfx.me_feature_version >= 44) &&
567                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
568                     (adev->gfx.pfp_feature_version >= 44))
569                         adev->gfx.me_fw_write_wait = true;
570
571                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
572                     (adev->gfx.mec_feature_version >= 44))
573                         adev->gfx.mec_fw_write_wait = true;
574                 break;
575         case CHIP_RAVEN:
576                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577                     (adev->gfx.me_feature_version >= 42) &&
578                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
579                     (adev->gfx.pfp_feature_version >= 42))
580                         adev->gfx.me_fw_write_wait = true;
581
582                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
583                     (adev->gfx.mec_feature_version >= 42))
584                         adev->gfx.mec_fw_write_wait = true;
585                 break;
586         default:
587                 break;
588         }
589 }
590
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 {
593         switch (adev->asic_type) {
594         case CHIP_VEGA10:
595         case CHIP_VEGA12:
596         case CHIP_VEGA20:
597                 break;
598         case CHIP_RAVEN:
599                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600                         break;
601                 if ((adev->gfx.rlc_fw_version != 106 &&
602                      adev->gfx.rlc_fw_version < 531) ||
603                     (adev->gfx.rlc_fw_version == 53815) ||
604                     (adev->gfx.rlc_feature_version < 1) ||
605                     !adev->gfx.rlc.is_rlc_v2_1)
606                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
607                 break;
608         default:
609                 break;
610         }
611 }
612
613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
614 {
615         const char *chip_name;
616         char fw_name[30];
617         int err;
618         struct amdgpu_firmware_info *info = NULL;
619         const struct common_firmware_header *header = NULL;
620         const struct gfx_firmware_header_v1_0 *cp_hdr;
621         const struct rlc_firmware_header_v2_0 *rlc_hdr;
622         unsigned int *tmp = NULL;
623         unsigned int i = 0;
624         uint16_t version_major;
625         uint16_t version_minor;
626         uint32_t smu_version;
627
628         DRM_DEBUG("\n");
629
630         switch (adev->asic_type) {
631         case CHIP_VEGA10:
632                 chip_name = "vega10";
633                 break;
634         case CHIP_VEGA12:
635                 chip_name = "vega12";
636                 break;
637         case CHIP_VEGA20:
638                 chip_name = "vega20";
639                 break;
640         case CHIP_RAVEN:
641                 if (adev->rev_id >= 8)
642                         chip_name = "raven2";
643                 else if (adev->pdev->device == 0x15d8)
644                         chip_name = "picasso";
645                 else
646                         chip_name = "raven";
647                 break;
648         default:
649                 BUG();
650         }
651
652         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
653         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
654         if (err)
655                 goto out;
656         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
657         if (err)
658                 goto out;
659         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
660         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
661         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
662
663         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
664         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
665         if (err)
666                 goto out;
667         err = amdgpu_ucode_validate(adev->gfx.me_fw);
668         if (err)
669                 goto out;
670         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
671         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
672         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
673
674         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
675         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
676         if (err)
677                 goto out;
678         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
679         if (err)
680                 goto out;
681         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
682         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
683         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
684
685         /*
686          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
687          * instead of picasso_rlc.bin.
688          * Judgment method:
689          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
690          *          or revision >= 0xD8 && revision <= 0xDF
691          * otherwise is PCO FP5
692          */
693         if (!strcmp(chip_name, "picasso") &&
694                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
695                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
696                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
697         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
698                 (smu_version >= 0x41e2b))
699                 /**
700                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
701                 */
702                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
703         else
704                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
705         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
706         if (err)
707                 goto out;
708         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
709         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
710
711         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
712         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
713         if (version_major == 2 && version_minor == 1)
714                 adev->gfx.rlc.is_rlc_v2_1 = true;
715
716         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
717         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
718         adev->gfx.rlc.save_and_restore_offset =
719                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
720         adev->gfx.rlc.clear_state_descriptor_offset =
721                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
722         adev->gfx.rlc.avail_scratch_ram_locations =
723                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
724         adev->gfx.rlc.reg_restore_list_size =
725                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
726         adev->gfx.rlc.reg_list_format_start =
727                         le32_to_cpu(rlc_hdr->reg_list_format_start);
728         adev->gfx.rlc.reg_list_format_separate_start =
729                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
730         adev->gfx.rlc.starting_offsets_start =
731                         le32_to_cpu(rlc_hdr->starting_offsets_start);
732         adev->gfx.rlc.reg_list_format_size_bytes =
733                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
734         adev->gfx.rlc.reg_list_size_bytes =
735                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
736         adev->gfx.rlc.register_list_format =
737                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
738                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
739         if (!adev->gfx.rlc.register_list_format) {
740                 err = -ENOMEM;
741                 goto out;
742         }
743
744         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
745                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
746         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
747                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
748
749         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
750
751         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
752                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
753         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
754                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
755
756         if (adev->gfx.rlc.is_rlc_v2_1)
757                 gfx_v9_0_init_rlc_ext_microcode(adev);
758
759         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
760         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
761         if (err)
762                 goto out;
763         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
764         if (err)
765                 goto out;
766         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
767         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
768         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
769
770
771         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
772         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
773         if (!err) {
774                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
775                 if (err)
776                         goto out;
777                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
778                 adev->gfx.mec2_fw->data;
779                 adev->gfx.mec2_fw_version =
780                 le32_to_cpu(cp_hdr->header.ucode_version);
781                 adev->gfx.mec2_feature_version =
782                 le32_to_cpu(cp_hdr->ucode_feature_version);
783         } else {
784                 err = 0;
785                 adev->gfx.mec2_fw = NULL;
786         }
787
788         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
789                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
790                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
791                 info->fw = adev->gfx.pfp_fw;
792                 header = (const struct common_firmware_header *)info->fw->data;
793                 adev->firmware.fw_size +=
794                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
795
796                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
797                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
798                 info->fw = adev->gfx.me_fw;
799                 header = (const struct common_firmware_header *)info->fw->data;
800                 adev->firmware.fw_size +=
801                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
802
803                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
804                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
805                 info->fw = adev->gfx.ce_fw;
806                 header = (const struct common_firmware_header *)info->fw->data;
807                 adev->firmware.fw_size +=
808                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
809
810                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
811                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
812                 info->fw = adev->gfx.rlc_fw;
813                 header = (const struct common_firmware_header *)info->fw->data;
814                 adev->firmware.fw_size +=
815                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
816
817                 if (adev->gfx.rlc.is_rlc_v2_1 &&
818                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
819                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
820                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
821                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
822                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
823                         info->fw = adev->gfx.rlc_fw;
824                         adev->firmware.fw_size +=
825                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
826
827                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
828                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
829                         info->fw = adev->gfx.rlc_fw;
830                         adev->firmware.fw_size +=
831                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
832
833                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
834                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
835                         info->fw = adev->gfx.rlc_fw;
836                         adev->firmware.fw_size +=
837                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
838                 }
839
840                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
841                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
842                 info->fw = adev->gfx.mec_fw;
843                 header = (const struct common_firmware_header *)info->fw->data;
844                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845                 adev->firmware.fw_size +=
846                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847
848                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
849                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
850                 info->fw = adev->gfx.mec_fw;
851                 adev->firmware.fw_size +=
852                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
853
854                 if (adev->gfx.mec2_fw) {
855                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
856                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
857                         info->fw = adev->gfx.mec2_fw;
858                         header = (const struct common_firmware_header *)info->fw->data;
859                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
860                         adev->firmware.fw_size +=
861                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
862                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
863                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
864                         info->fw = adev->gfx.mec2_fw;
865                         adev->firmware.fw_size +=
866                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
867                 }
868
869         }
870
871 out:
872         gfx_v9_0_check_if_need_gfxoff(adev);
873         gfx_v9_0_check_fw_write_wait(adev);
874         if (err) {
875                 dev_err(adev->dev,
876                         "gfx9: Failed to load firmware \"%s\"\n",
877                         fw_name);
878                 release_firmware(adev->gfx.pfp_fw);
879                 adev->gfx.pfp_fw = NULL;
880                 release_firmware(adev->gfx.me_fw);
881                 adev->gfx.me_fw = NULL;
882                 release_firmware(adev->gfx.ce_fw);
883                 adev->gfx.ce_fw = NULL;
884                 release_firmware(adev->gfx.rlc_fw);
885                 adev->gfx.rlc_fw = NULL;
886                 release_firmware(adev->gfx.mec_fw);
887                 adev->gfx.mec_fw = NULL;
888                 release_firmware(adev->gfx.mec2_fw);
889                 adev->gfx.mec2_fw = NULL;
890         }
891         return err;
892 }
893
894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
895 {
896         u32 count = 0;
897         const struct cs_section_def *sect = NULL;
898         const struct cs_extent_def *ext = NULL;
899
900         /* begin clear state */
901         count += 2;
902         /* context control state */
903         count += 3;
904
905         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
906                 for (ext = sect->section; ext->extent != NULL; ++ext) {
907                         if (sect->id == SECT_CONTEXT)
908                                 count += 2 + ext->reg_count;
909                         else
910                                 return 0;
911                 }
912         }
913
914         /* end clear state */
915         count += 2;
916         /* clear state */
917         count += 2;
918
919         return count;
920 }
921
922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
923                                     volatile u32 *buffer)
924 {
925         u32 count = 0, i;
926         const struct cs_section_def *sect = NULL;
927         const struct cs_extent_def *ext = NULL;
928
929         if (adev->gfx.rlc.cs_data == NULL)
930                 return;
931         if (buffer == NULL)
932                 return;
933
934         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
935         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
936
937         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
938         buffer[count++] = cpu_to_le32(0x80000000);
939         buffer[count++] = cpu_to_le32(0x80000000);
940
941         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
942                 for (ext = sect->section; ext->extent != NULL; ++ext) {
943                         if (sect->id == SECT_CONTEXT) {
944                                 buffer[count++] =
945                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
946                                 buffer[count++] = cpu_to_le32(ext->reg_index -
947                                                 PACKET3_SET_CONTEXT_REG_START);
948                                 for (i = 0; i < ext->reg_count; i++)
949                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
950                         } else {
951                                 return;
952                         }
953                 }
954         }
955
956         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
957         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
958
959         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
960         buffer[count++] = cpu_to_le32(0);
961 }
962
963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
964 {
965         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
966         uint32_t pg_always_on_cu_num = 2;
967         uint32_t always_on_cu_num;
968         uint32_t i, j, k;
969         uint32_t mask, cu_bitmap, counter;
970
971         if (adev->flags & AMD_IS_APU)
972                 always_on_cu_num = 4;
973         else if (adev->asic_type == CHIP_VEGA12)
974                 always_on_cu_num = 8;
975         else
976                 always_on_cu_num = 12;
977
978         mutex_lock(&adev->grbm_idx_mutex);
979         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
980                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
981                         mask = 1;
982                         cu_bitmap = 0;
983                         counter = 0;
984                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
985
986                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
987                                 if (cu_info->bitmap[i][j] & mask) {
988                                         if (counter == pg_always_on_cu_num)
989                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
990                                         if (counter < always_on_cu_num)
991                                                 cu_bitmap |= mask;
992                                         else
993                                                 break;
994                                         counter++;
995                                 }
996                                 mask <<= 1;
997                         }
998
999                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1000                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1001                 }
1002         }
1003         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1004         mutex_unlock(&adev->grbm_idx_mutex);
1005 }
1006
1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1008 {
1009         uint32_t data;
1010
1011         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1012         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1013         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1014         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1015         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1016
1017         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1018         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1019
1020         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1021         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1022
1023         mutex_lock(&adev->grbm_idx_mutex);
1024         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1025         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1026         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1027
1028         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1029         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1030         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1031         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1032         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1033
1034         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1035         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1036         data &= 0x0000FFFF;
1037         data |= 0x00C00000;
1038         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1039
1040         /*
1041          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1042          * programmed in gfx_v9_0_init_always_on_cu_mask()
1043          */
1044
1045         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1046          * but used for RLC_LB_CNTL configuration */
1047         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1048         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1049         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1050         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1051         mutex_unlock(&adev->grbm_idx_mutex);
1052
1053         gfx_v9_0_init_always_on_cu_mask(adev);
1054 }
1055
1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1057 {
1058         uint32_t data;
1059
1060         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1061         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1062         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1063         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1064         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1065
1066         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1067         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1068
1069         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1070         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1071
1072         mutex_lock(&adev->grbm_idx_mutex);
1073         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1074         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1075         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1076
1077         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1078         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1079         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1080         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1081         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1082
1083         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1084         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1085         data &= 0x0000FFFF;
1086         data |= 0x00C00000;
1087         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1088
1089         /*
1090          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1091          * programmed in gfx_v9_0_init_always_on_cu_mask()
1092          */
1093
1094         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1095          * but used for RLC_LB_CNTL configuration */
1096         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1097         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1098         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1099         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1100         mutex_unlock(&adev->grbm_idx_mutex);
1101
1102         gfx_v9_0_init_always_on_cu_mask(adev);
1103 }
1104
1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1106 {
1107         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1108 }
1109
1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1111 {
1112         return 5;
1113 }
1114
1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1116 {
1117         const struct cs_section_def *cs_data;
1118         int r;
1119
1120         adev->gfx.rlc.cs_data = gfx9_cs_data;
1121
1122         cs_data = adev->gfx.rlc.cs_data;
1123
1124         if (cs_data) {
1125                 /* init clear state block */
1126                 r = amdgpu_gfx_rlc_init_csb(adev);
1127                 if (r)
1128                         return r;
1129         }
1130
1131         if (adev->asic_type == CHIP_RAVEN) {
1132                 /* TODO: double check the cp_table_size for RV */
1133                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1134                 r = amdgpu_gfx_rlc_init_cpt(adev);
1135                 if (r)
1136                         return r;
1137         }
1138
1139         switch (adev->asic_type) {
1140         case CHIP_RAVEN:
1141                 gfx_v9_0_init_lbpw(adev);
1142                 break;
1143         case CHIP_VEGA20:
1144                 gfx_v9_4_init_lbpw(adev);
1145                 break;
1146         default:
1147                 break;
1148         }
1149
1150         return 0;
1151 }
1152
1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1154 {
1155         int r;
1156
1157         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1158         if (unlikely(r != 0))
1159                 return r;
1160
1161         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1162                         AMDGPU_GEM_DOMAIN_VRAM);
1163         if (!r)
1164                 adev->gfx.rlc.clear_state_gpu_addr =
1165                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1166
1167         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1168
1169         return r;
1170 }
1171
1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1173 {
1174         int r;
1175
1176         if (!adev->gfx.rlc.clear_state_obj)
1177                 return;
1178
1179         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1180         if (likely(r == 0)) {
1181                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1182                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1183         }
1184 }
1185
1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1187 {
1188         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1189         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1190 }
1191
1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1193 {
1194         int r;
1195         u32 *hpd;
1196         const __le32 *fw_data;
1197         unsigned fw_size;
1198         u32 *fw;
1199         size_t mec_hpd_size;
1200
1201         const struct gfx_firmware_header_v1_0 *mec_hdr;
1202
1203         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1204
1205         /* take ownership of the relevant compute queues */
1206         amdgpu_gfx_compute_queue_acquire(adev);
1207         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1208
1209         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1210                                       AMDGPU_GEM_DOMAIN_VRAM,
1211                                       &adev->gfx.mec.hpd_eop_obj,
1212                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1213                                       (void **)&hpd);
1214         if (r) {
1215                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1216                 gfx_v9_0_mec_fini(adev);
1217                 return r;
1218         }
1219
1220         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1221
1222         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1223         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1224
1225         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1226
1227         fw_data = (const __le32 *)
1228                 (adev->gfx.mec_fw->data +
1229                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1230         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1231
1232         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1233                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1234                                       &adev->gfx.mec.mec_fw_obj,
1235                                       &adev->gfx.mec.mec_fw_gpu_addr,
1236                                       (void **)&fw);
1237         if (r) {
1238                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1239                 gfx_v9_0_mec_fini(adev);
1240                 return r;
1241         }
1242
1243         memcpy(fw, fw_data, fw_size);
1244
1245         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1246         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1247
1248         return 0;
1249 }
1250
1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1252 {
1253         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1254                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1255                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1256                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1257                 (SQ_IND_INDEX__FORCE_READ_MASK));
1258         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1259 }
1260
1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1262                            uint32_t wave, uint32_t thread,
1263                            uint32_t regno, uint32_t num, uint32_t *out)
1264 {
1265         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1266                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1267                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1268                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1269                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1270                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1271                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1272         while (num--)
1273                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1274 }
1275
1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1277 {
1278         /* type 1 wave data */
1279         dst[(*no_fields)++] = 1;
1280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1288         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1289         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1290         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1291         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1292         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1293         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1294 }
1295
1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1297                                      uint32_t wave, uint32_t start,
1298                                      uint32_t size, uint32_t *dst)
1299 {
1300         wave_read_regs(
1301                 adev, simd, wave, 0,
1302                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1303 }
1304
1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1306                                      uint32_t wave, uint32_t thread,
1307                                      uint32_t start, uint32_t size,
1308                                      uint32_t *dst)
1309 {
1310         wave_read_regs(
1311                 adev, simd, wave, thread,
1312                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1313 }
1314
1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1316                                   u32 me, u32 pipe, u32 q, u32 vm)
1317 {
1318         soc15_grbm_select(adev, me, pipe, q, vm);
1319 }
1320
1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1322         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1323         .select_se_sh = &gfx_v9_0_select_se_sh,
1324         .read_wave_data = &gfx_v9_0_read_wave_data,
1325         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1326         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1327         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1328 };
1329
1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1331 {
1332         u32 gb_addr_config;
1333         int err;
1334
1335         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1336
1337         switch (adev->asic_type) {
1338         case CHIP_VEGA10:
1339                 adev->gfx.config.max_hw_contexts = 8;
1340                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1344                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1345                 break;
1346         case CHIP_VEGA12:
1347                 adev->gfx.config.max_hw_contexts = 8;
1348                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1349                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1350                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1351                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1352                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1353                 DRM_INFO("fix gfx.config for vega12\n");
1354                 break;
1355         case CHIP_VEGA20:
1356                 adev->gfx.config.max_hw_contexts = 8;
1357                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1361                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1362                 gb_addr_config &= ~0xf3e777ff;
1363                 gb_addr_config |= 0x22014042;
1364                 /* check vbios table if gpu info is not available */
1365                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1366                 if (err)
1367                         return err;
1368                 break;
1369         case CHIP_RAVEN:
1370                 adev->gfx.config.max_hw_contexts = 8;
1371                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1372                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1373                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1374                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1375                 if (adev->rev_id >= 8)
1376                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1377                 else
1378                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1379                 break;
1380         default:
1381                 BUG();
1382                 break;
1383         }
1384
1385         adev->gfx.config.gb_addr_config = gb_addr_config;
1386
1387         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1388                         REG_GET_FIELD(
1389                                         adev->gfx.config.gb_addr_config,
1390                                         GB_ADDR_CONFIG,
1391                                         NUM_PIPES);
1392
1393         adev->gfx.config.max_tile_pipes =
1394                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1395
1396         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1397                         REG_GET_FIELD(
1398                                         adev->gfx.config.gb_addr_config,
1399                                         GB_ADDR_CONFIG,
1400                                         NUM_BANKS);
1401         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1402                         REG_GET_FIELD(
1403                                         adev->gfx.config.gb_addr_config,
1404                                         GB_ADDR_CONFIG,
1405                                         MAX_COMPRESSED_FRAGS);
1406         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1407                         REG_GET_FIELD(
1408                                         adev->gfx.config.gb_addr_config,
1409                                         GB_ADDR_CONFIG,
1410                                         NUM_RB_PER_SE);
1411         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1412                         REG_GET_FIELD(
1413                                         adev->gfx.config.gb_addr_config,
1414                                         GB_ADDR_CONFIG,
1415                                         NUM_SHADER_ENGINES);
1416         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1417                         REG_GET_FIELD(
1418                                         adev->gfx.config.gb_addr_config,
1419                                         GB_ADDR_CONFIG,
1420                                         PIPE_INTERLEAVE_SIZE));
1421
1422         return 0;
1423 }
1424
1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1426                                    struct amdgpu_ngg_buf *ngg_buf,
1427                                    int size_se,
1428                                    int default_size_se)
1429 {
1430         int r;
1431
1432         if (size_se < 0) {
1433                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1434                 return -EINVAL;
1435         }
1436         size_se = size_se ? size_se : default_size_se;
1437
1438         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1439         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1440                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1441                                     &ngg_buf->bo,
1442                                     &ngg_buf->gpu_addr,
1443                                     NULL);
1444         if (r) {
1445                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1446                 return r;
1447         }
1448         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1449
1450         return r;
1451 }
1452
1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1454 {
1455         int i;
1456
1457         for (i = 0; i < NGG_BUF_MAX; i++)
1458                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1459                                       &adev->gfx.ngg.buf[i].gpu_addr,
1460                                       NULL);
1461
1462         memset(&adev->gfx.ngg.buf[0], 0,
1463                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1464
1465         adev->gfx.ngg.init = false;
1466
1467         return 0;
1468 }
1469
1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1471 {
1472         int r;
1473
1474         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1475                 return 0;
1476
1477         /* GDS reserve memory: 64 bytes alignment */
1478         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1479         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1480         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1481         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1482
1483         /* Primitive Buffer */
1484         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1485                                     amdgpu_prim_buf_per_se,
1486                                     64 * 1024);
1487         if (r) {
1488                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1489                 goto err;
1490         }
1491
1492         /* Position Buffer */
1493         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1494                                     amdgpu_pos_buf_per_se,
1495                                     256 * 1024);
1496         if (r) {
1497                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1498                 goto err;
1499         }
1500
1501         /* Control Sideband */
1502         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1503                                     amdgpu_cntl_sb_buf_per_se,
1504                                     256);
1505         if (r) {
1506                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1507                 goto err;
1508         }
1509
1510         /* Parameter Cache, not created by default */
1511         if (amdgpu_param_buf_per_se <= 0)
1512                 goto out;
1513
1514         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1515                                     amdgpu_param_buf_per_se,
1516                                     512 * 1024);
1517         if (r) {
1518                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1519                 goto err;
1520         }
1521
1522 out:
1523         adev->gfx.ngg.init = true;
1524         return 0;
1525 err:
1526         gfx_v9_0_ngg_fini(adev);
1527         return r;
1528 }
1529
1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1531 {
1532         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1533         int r;
1534         u32 data, base;
1535
1536         if (!amdgpu_ngg)
1537                 return 0;
1538
1539         /* Program buffer size */
1540         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1541                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1542         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1543                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1544         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1545
1546         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1547                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1548         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1549                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1550         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1551
1552         /* Program buffer base address */
1553         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1555         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1556
1557         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1559         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1560
1561         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1563         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1564
1565         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1567         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1568
1569         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1571         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1572
1573         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1575         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1576
1577         /* Clear GDS reserved memory */
1578         r = amdgpu_ring_alloc(ring, 17);
1579         if (r) {
1580                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1581                           ring->name, r);
1582                 return r;
1583         }
1584
1585         gfx_v9_0_write_data_to_reg(ring, 0, false,
1586                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1587                                    (adev->gds.gds_size +
1588                                     adev->gfx.ngg.gds_reserve_size));
1589
1590         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1591         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1592                                 PACKET3_DMA_DATA_DST_SEL(1) |
1593                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1594         amdgpu_ring_write(ring, 0);
1595         amdgpu_ring_write(ring, 0);
1596         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1597         amdgpu_ring_write(ring, 0);
1598         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1599                                 adev->gfx.ngg.gds_reserve_size);
1600
1601         gfx_v9_0_write_data_to_reg(ring, 0, false,
1602                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1603
1604         amdgpu_ring_commit(ring);
1605
1606         return 0;
1607 }
1608
1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1610                                       int mec, int pipe, int queue)
1611 {
1612         int r;
1613         unsigned irq_type;
1614         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1615
1616         ring = &adev->gfx.compute_ring[ring_id];
1617
1618         /* mec0 is me1 */
1619         ring->me = mec + 1;
1620         ring->pipe = pipe;
1621         ring->queue = queue;
1622
1623         ring->ring_obj = NULL;
1624         ring->use_doorbell = true;
1625         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1626         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1627                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1628         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1629
1630         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1631                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1632                 + ring->pipe;
1633
1634         /* type-2 packets are deprecated on MEC, use type-3 instead */
1635         r = amdgpu_ring_init(adev, ring, 1024,
1636                              &adev->gfx.eop_irq, irq_type);
1637         if (r)
1638                 return r;
1639
1640
1641         return 0;
1642 }
1643
1644 static int gfx_v9_0_sw_init(void *handle)
1645 {
1646         int i, j, k, r, ring_id;
1647         struct amdgpu_ring *ring;
1648         struct amdgpu_kiq *kiq;
1649         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650
1651         switch (adev->asic_type) {
1652         case CHIP_VEGA10:
1653         case CHIP_VEGA12:
1654         case CHIP_VEGA20:
1655         case CHIP_RAVEN:
1656                 adev->gfx.mec.num_mec = 2;
1657                 break;
1658         default:
1659                 adev->gfx.mec.num_mec = 1;
1660                 break;
1661         }
1662
1663         adev->gfx.mec.num_pipe_per_mec = 4;
1664         adev->gfx.mec.num_queue_per_pipe = 8;
1665
1666         /* EOP Event */
1667         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1668         if (r)
1669                 return r;
1670
1671         /* Privileged reg */
1672         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1673                               &adev->gfx.priv_reg_irq);
1674         if (r)
1675                 return r;
1676
1677         /* Privileged inst */
1678         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1679                               &adev->gfx.priv_inst_irq);
1680         if (r)
1681                 return r;
1682
1683         /* ECC error */
1684         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1685                               &adev->gfx.cp_ecc_error_irq);
1686         if (r)
1687                 return r;
1688
1689         /* FUE error */
1690         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1691                               &adev->gfx.cp_ecc_error_irq);
1692         if (r)
1693                 return r;
1694
1695         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1696
1697         gfx_v9_0_scratch_init(adev);
1698
1699         r = gfx_v9_0_init_microcode(adev);
1700         if (r) {
1701                 DRM_ERROR("Failed to load gfx firmware!\n");
1702                 return r;
1703         }
1704
1705         r = adev->gfx.rlc.funcs->init(adev);
1706         if (r) {
1707                 DRM_ERROR("Failed to init rlc BOs!\n");
1708                 return r;
1709         }
1710
1711         r = gfx_v9_0_mec_init(adev);
1712         if (r) {
1713                 DRM_ERROR("Failed to init MEC BOs!\n");
1714                 return r;
1715         }
1716
1717         /* set up the gfx ring */
1718         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1719                 ring = &adev->gfx.gfx_ring[i];
1720                 ring->ring_obj = NULL;
1721                 if (!i)
1722                         sprintf(ring->name, "gfx");
1723                 else
1724                         sprintf(ring->name, "gfx_%d", i);
1725                 ring->use_doorbell = true;
1726                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1727                 r = amdgpu_ring_init(adev, ring, 1024,
1728                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1729                 if (r)
1730                         return r;
1731         }
1732
1733         /* set up the compute queues - allocate horizontally across pipes */
1734         ring_id = 0;
1735         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1736                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1737                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1738                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1739                                         continue;
1740
1741                                 r = gfx_v9_0_compute_ring_init(adev,
1742                                                                ring_id,
1743                                                                i, k, j);
1744                                 if (r)
1745                                         return r;
1746
1747                                 ring_id++;
1748                         }
1749                 }
1750         }
1751
1752         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1753         if (r) {
1754                 DRM_ERROR("Failed to init KIQ BOs!\n");
1755                 return r;
1756         }
1757
1758         kiq = &adev->gfx.kiq;
1759         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1760         if (r)
1761                 return r;
1762
1763         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1764         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1765         if (r)
1766                 return r;
1767
1768         adev->gfx.ce_ram_size = 0x8000;
1769
1770         r = gfx_v9_0_gpu_early_init(adev);
1771         if (r)
1772                 return r;
1773
1774         r = gfx_v9_0_ngg_init(adev);
1775         if (r)
1776                 return r;
1777
1778         return 0;
1779 }
1780
1781
1782 static int gfx_v9_0_sw_fini(void *handle)
1783 {
1784         int i;
1785         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1786
1787         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1788                         adev->gfx.ras_if) {
1789                 struct ras_common_if *ras_if = adev->gfx.ras_if;
1790                 struct ras_ih_if ih_info = {
1791                         .head = *ras_if,
1792                 };
1793
1794                 amdgpu_ras_debugfs_remove(adev, ras_if);
1795                 amdgpu_ras_sysfs_remove(adev, ras_if);
1796                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1797                 amdgpu_ras_feature_enable(adev, ras_if, 0);
1798                 kfree(ras_if);
1799         }
1800
1801         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1802                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1803         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1804                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1805
1806         amdgpu_gfx_mqd_sw_fini(adev);
1807         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1808         amdgpu_gfx_kiq_fini(adev);
1809
1810         gfx_v9_0_mec_fini(adev);
1811         gfx_v9_0_ngg_fini(adev);
1812         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1813         if (adev->asic_type == CHIP_RAVEN) {
1814                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1815                                 &adev->gfx.rlc.cp_table_gpu_addr,
1816                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1817         }
1818         gfx_v9_0_free_microcode(adev);
1819
1820         return 0;
1821 }
1822
1823
1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1825 {
1826         /* TODO */
1827 }
1828
1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1830 {
1831         u32 data;
1832
1833         if (instance == 0xffffffff)
1834                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1835         else
1836                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1837
1838         if (se_num == 0xffffffff)
1839                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1840         else
1841                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1842
1843         if (sh_num == 0xffffffff)
1844                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1845         else
1846                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1847
1848         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1849 }
1850
1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1852 {
1853         u32 data, mask;
1854
1855         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1856         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1857
1858         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1859         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1860
1861         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1862                                          adev->gfx.config.max_sh_per_se);
1863
1864         return (~data) & mask;
1865 }
1866
1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1868 {
1869         int i, j;
1870         u32 data;
1871         u32 active_rbs = 0;
1872         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1873                                         adev->gfx.config.max_sh_per_se;
1874
1875         mutex_lock(&adev->grbm_idx_mutex);
1876         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1877                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1878                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1879                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1880                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1881                                                rb_bitmap_width_per_sh);
1882                 }
1883         }
1884         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1885         mutex_unlock(&adev->grbm_idx_mutex);
1886
1887         adev->gfx.config.backend_enable_mask = active_rbs;
1888         adev->gfx.config.num_rbs = hweight32(active_rbs);
1889 }
1890
1891 #define DEFAULT_SH_MEM_BASES    (0x6000)
1892 #define FIRST_COMPUTE_VMID      (8)
1893 #define LAST_COMPUTE_VMID       (16)
1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1895 {
1896         int i;
1897         uint32_t sh_mem_config;
1898         uint32_t sh_mem_bases;
1899
1900         /*
1901          * Configure apertures:
1902          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1903          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1904          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1905          */
1906         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1907
1908         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1909                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1910                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1911
1912         mutex_lock(&adev->srbm_mutex);
1913         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1914                 soc15_grbm_select(adev, 0, 0, 0, i);
1915                 /* CP and shaders */
1916                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1917                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1918         }
1919         soc15_grbm_select(adev, 0, 0, 0, 0);
1920         mutex_unlock(&adev->srbm_mutex);
1921
1922         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1923            acccess. These should be enabled by FW for target VMIDs. */
1924         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1925                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
1926                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
1927                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
1928                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
1929         }
1930 }
1931
1932 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1933 {
1934         u32 tmp;
1935         int i;
1936
1937         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1938
1939         gfx_v9_0_tiling_mode_table_init(adev);
1940
1941         gfx_v9_0_setup_rb(adev);
1942         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1943         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1944
1945         /* XXX SH_MEM regs */
1946         /* where to put LDS, scratch, GPUVM in FSA64 space */
1947         mutex_lock(&adev->srbm_mutex);
1948         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1949                 soc15_grbm_select(adev, 0, 0, 0, i);
1950                 /* CP and shaders */
1951                 if (i == 0) {
1952                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1953                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1954                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1955                                             !!amdgpu_noretry);
1956                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1957                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1958                 } else {
1959                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1960                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1961                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1962                                             !!amdgpu_noretry);
1963                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1964                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1965                                 (adev->gmc.private_aperture_start >> 48));
1966                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1967                                 (adev->gmc.shared_aperture_start >> 48));
1968                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1969                 }
1970         }
1971         soc15_grbm_select(adev, 0, 0, 0, 0);
1972
1973         mutex_unlock(&adev->srbm_mutex);
1974
1975         gfx_v9_0_init_compute_vmid(adev);
1976 }
1977
1978 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1979 {
1980         u32 i, j, k;
1981         u32 mask;
1982
1983         mutex_lock(&adev->grbm_idx_mutex);
1984         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1985                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1986                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1987                         for (k = 0; k < adev->usec_timeout; k++) {
1988                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1989                                         break;
1990                                 udelay(1);
1991                         }
1992                         if (k == adev->usec_timeout) {
1993                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1994                                                       0xffffffff, 0xffffffff);
1995                                 mutex_unlock(&adev->grbm_idx_mutex);
1996                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1997                                          i, j);
1998                                 return;
1999                         }
2000                 }
2001         }
2002         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2003         mutex_unlock(&adev->grbm_idx_mutex);
2004
2005         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2006                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2007                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2008                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2009         for (k = 0; k < adev->usec_timeout; k++) {
2010                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2011                         break;
2012                 udelay(1);
2013         }
2014 }
2015
2016 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2017                                                bool enable)
2018 {
2019         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2020
2021         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2022         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2023         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2024         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2025
2026         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2027 }
2028
2029 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2030 {
2031         /* csib */
2032         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2033                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2034         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2035                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2036         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2037                         adev->gfx.rlc.clear_state_size);
2038 }
2039
2040 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2041                                 int indirect_offset,
2042                                 int list_size,
2043                                 int *unique_indirect_regs,
2044                                 int unique_indirect_reg_count,
2045                                 int *indirect_start_offsets,
2046                                 int *indirect_start_offsets_count,
2047                                 int max_start_offsets_count)
2048 {
2049         int idx;
2050
2051         for (; indirect_offset < list_size; indirect_offset++) {
2052                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2053                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2054                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2055
2056                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2057                         indirect_offset += 2;
2058
2059                         /* look for the matching indice */
2060                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2061                                 if (unique_indirect_regs[idx] ==
2062                                         register_list_format[indirect_offset] ||
2063                                         !unique_indirect_regs[idx])
2064                                         break;
2065                         }
2066
2067                         BUG_ON(idx >= unique_indirect_reg_count);
2068
2069                         if (!unique_indirect_regs[idx])
2070                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2071
2072                         indirect_offset++;
2073                 }
2074         }
2075 }
2076
2077 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2078 {
2079         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2080         int unique_indirect_reg_count = 0;
2081
2082         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2083         int indirect_start_offsets_count = 0;
2084
2085         int list_size = 0;
2086         int i = 0, j = 0;
2087         u32 tmp = 0;
2088
2089         u32 *register_list_format =
2090                 kmemdup(adev->gfx.rlc.register_list_format,
2091                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2092         if (!register_list_format)
2093                 return -ENOMEM;
2094
2095         /* setup unique_indirect_regs array and indirect_start_offsets array */
2096         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2097         gfx_v9_1_parse_ind_reg_list(register_list_format,
2098                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2099                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2100                                     unique_indirect_regs,
2101                                     unique_indirect_reg_count,
2102                                     indirect_start_offsets,
2103                                     &indirect_start_offsets_count,
2104                                     ARRAY_SIZE(indirect_start_offsets));
2105
2106         /* enable auto inc in case it is disabled */
2107         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2108         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2109         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2110
2111         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2112         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2113                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2114         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2115                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2116                         adev->gfx.rlc.register_restore[i]);
2117
2118         /* load indirect register */
2119         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2120                 adev->gfx.rlc.reg_list_format_start);
2121
2122         /* direct register portion */
2123         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2124                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2125                         register_list_format[i]);
2126
2127         /* indirect register portion */
2128         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2129                 if (register_list_format[i] == 0xFFFFFFFF) {
2130                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2131                         continue;
2132                 }
2133
2134                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2135                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2136
2137                 for (j = 0; j < unique_indirect_reg_count; j++) {
2138                         if (register_list_format[i] == unique_indirect_regs[j]) {
2139                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2140                                 break;
2141                         }
2142                 }
2143
2144                 BUG_ON(j >= unique_indirect_reg_count);
2145
2146                 i++;
2147         }
2148
2149         /* set save/restore list size */
2150         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2151         list_size = list_size >> 1;
2152         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2153                 adev->gfx.rlc.reg_restore_list_size);
2154         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2155
2156         /* write the starting offsets to RLC scratch ram */
2157         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2158                 adev->gfx.rlc.starting_offsets_start);
2159         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2160                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2161                        indirect_start_offsets[i]);
2162
2163         /* load unique indirect regs*/
2164         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2165                 if (unique_indirect_regs[i] != 0) {
2166                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2167                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2168                                unique_indirect_regs[i] & 0x3FFFF);
2169
2170                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2171                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2172                                unique_indirect_regs[i] >> 20);
2173                 }
2174         }
2175
2176         kfree(register_list_format);
2177         return 0;
2178 }
2179
2180 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2181 {
2182         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2183 }
2184
2185 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2186                                              bool enable)
2187 {
2188         uint32_t data = 0;
2189         uint32_t default_data = 0;
2190
2191         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2192         if (enable == true) {
2193                 /* enable GFXIP control over CGPG */
2194                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2195                 if(default_data != data)
2196                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2197
2198                 /* update status */
2199                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2200                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2201                 if(default_data != data)
2202                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2203         } else {
2204                 /* restore GFXIP control over GCPG */
2205                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2206                 if(default_data != data)
2207                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2208         }
2209 }
2210
2211 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2212 {
2213         uint32_t data = 0;
2214
2215         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2216                               AMD_PG_SUPPORT_GFX_SMG |
2217                               AMD_PG_SUPPORT_GFX_DMG)) {
2218                 /* init IDLE_POLL_COUNT = 60 */
2219                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2220                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2221                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2222                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2223
2224                 /* init RLC PG Delay */
2225                 data = 0;
2226                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2227                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2228                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2229                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2230                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2231
2232                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2233                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2234                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2235                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2236
2237                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2238                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2239                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2240                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2241
2242                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2243                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2244
2245                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2246                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2247                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2248
2249                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2250         }
2251 }
2252
2253 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2254                                                 bool enable)
2255 {
2256         uint32_t data = 0;
2257         uint32_t default_data = 0;
2258
2259         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2260         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2261                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2262                              enable ? 1 : 0);
2263         if (default_data != data)
2264                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2265 }
2266
2267 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2268                                                 bool enable)
2269 {
2270         uint32_t data = 0;
2271         uint32_t default_data = 0;
2272
2273         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2274         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2275                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2276                              enable ? 1 : 0);
2277         if(default_data != data)
2278                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2279 }
2280
2281 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2282                                         bool enable)
2283 {
2284         uint32_t data = 0;
2285         uint32_t default_data = 0;
2286
2287         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2288         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2289                              CP_PG_DISABLE,
2290                              enable ? 0 : 1);
2291         if(default_data != data)
2292                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2293 }
2294
2295 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2296                                                 bool enable)
2297 {
2298         uint32_t data, default_data;
2299
2300         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2301         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2302                              GFX_POWER_GATING_ENABLE,
2303                              enable ? 1 : 0);
2304         if(default_data != data)
2305                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2306 }
2307
2308 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2309                                                 bool enable)
2310 {
2311         uint32_t data, default_data;
2312
2313         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2314         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2315                              GFX_PIPELINE_PG_ENABLE,
2316                              enable ? 1 : 0);
2317         if(default_data != data)
2318                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2319
2320         if (!enable)
2321                 /* read any GFX register to wake up GFX */
2322                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2323 }
2324
2325 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2326                                                        bool enable)
2327 {
2328         uint32_t data, default_data;
2329
2330         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2331         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2332                              STATIC_PER_CU_PG_ENABLE,
2333                              enable ? 1 : 0);
2334         if(default_data != data)
2335                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2336 }
2337
2338 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2339                                                 bool enable)
2340 {
2341         uint32_t data, default_data;
2342
2343         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2344         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2345                              DYN_PER_CU_PG_ENABLE,
2346                              enable ? 1 : 0);
2347         if(default_data != data)
2348                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2349 }
2350
2351 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2352 {
2353         gfx_v9_0_init_csb(adev);
2354
2355         /*
2356          * Rlc save restore list is workable since v2_1.
2357          * And it's needed by gfxoff feature.
2358          */
2359         if (adev->gfx.rlc.is_rlc_v2_1) {
2360                 gfx_v9_1_init_rlc_save_restore_list(adev);
2361                 gfx_v9_0_enable_save_restore_machine(adev);
2362         }
2363
2364         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2365                               AMD_PG_SUPPORT_GFX_SMG |
2366                               AMD_PG_SUPPORT_GFX_DMG |
2367                               AMD_PG_SUPPORT_CP |
2368                               AMD_PG_SUPPORT_GDS |
2369                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2370                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2371                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2372                 gfx_v9_0_init_gfx_power_gating(adev);
2373         }
2374 }
2375
2376 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2377 {
2378         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2379         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2380         gfx_v9_0_wait_for_rlc_serdes(adev);
2381 }
2382
2383 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2384 {
2385         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2386         udelay(50);
2387         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2388         udelay(50);
2389 }
2390
2391 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2392 {
2393 #ifdef AMDGPU_RLC_DEBUG_RETRY
2394         u32 rlc_ucode_ver;
2395 #endif
2396
2397         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2398         udelay(50);
2399
2400         /* carrizo do enable cp interrupt after cp inited */
2401         if (!(adev->flags & AMD_IS_APU)) {
2402                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2403                 udelay(50);
2404         }
2405
2406 #ifdef AMDGPU_RLC_DEBUG_RETRY
2407         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2408         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2409         if(rlc_ucode_ver == 0x108) {
2410                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2411                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2412                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2413                  * default is 0x9C4 to create a 100us interval */
2414                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2415                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2416                  * to disable the page fault retry interrupts, default is
2417                  * 0x100 (256) */
2418                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2419         }
2420 #endif
2421 }
2422
2423 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2424 {
2425         const struct rlc_firmware_header_v2_0 *hdr;
2426         const __le32 *fw_data;
2427         unsigned i, fw_size;
2428
2429         if (!adev->gfx.rlc_fw)
2430                 return -EINVAL;
2431
2432         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2433         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2434
2435         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2436                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2437         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2438
2439         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2440                         RLCG_UCODE_LOADING_START_ADDRESS);
2441         for (i = 0; i < fw_size; i++)
2442                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2443         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2444
2445         return 0;
2446 }
2447
2448 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2449 {
2450         int r;
2451
2452         if (amdgpu_sriov_vf(adev)) {
2453                 gfx_v9_0_init_csb(adev);
2454                 return 0;
2455         }
2456
2457         adev->gfx.rlc.funcs->stop(adev);
2458
2459         /* disable CG */
2460         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2461
2462         gfx_v9_0_init_pg(adev);
2463
2464         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2465                 /* legacy rlc firmware loading */
2466                 r = gfx_v9_0_rlc_load_microcode(adev);
2467                 if (r)
2468                         return r;
2469         }
2470
2471         switch (adev->asic_type) {
2472         case CHIP_RAVEN:
2473                 if (amdgpu_lbpw == 0)
2474                         gfx_v9_0_enable_lbpw(adev, false);
2475                 else
2476                         gfx_v9_0_enable_lbpw(adev, true);
2477                 break;
2478         case CHIP_VEGA20:
2479                 if (amdgpu_lbpw > 0)
2480                         gfx_v9_0_enable_lbpw(adev, true);
2481                 else
2482                         gfx_v9_0_enable_lbpw(adev, false);
2483                 break;
2484         default:
2485                 break;
2486         }
2487
2488         adev->gfx.rlc.funcs->start(adev);
2489
2490         return 0;
2491 }
2492
2493 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2494 {
2495         int i;
2496         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2497
2498         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2499         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2500         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2501         if (!enable) {
2502                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2503                         adev->gfx.gfx_ring[i].sched.ready = false;
2504         }
2505         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2506         udelay(50);
2507 }
2508
2509 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2510 {
2511         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2512         const struct gfx_firmware_header_v1_0 *ce_hdr;
2513         const struct gfx_firmware_header_v1_0 *me_hdr;
2514         const __le32 *fw_data;
2515         unsigned i, fw_size;
2516
2517         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2518                 return -EINVAL;
2519
2520         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2521                 adev->gfx.pfp_fw->data;
2522         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2523                 adev->gfx.ce_fw->data;
2524         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2525                 adev->gfx.me_fw->data;
2526
2527         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2528         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2529         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2530
2531         gfx_v9_0_cp_gfx_enable(adev, false);
2532
2533         /* PFP */
2534         fw_data = (const __le32 *)
2535                 (adev->gfx.pfp_fw->data +
2536                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2537         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2538         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2539         for (i = 0; i < fw_size; i++)
2540                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2541         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2542
2543         /* CE */
2544         fw_data = (const __le32 *)
2545                 (adev->gfx.ce_fw->data +
2546                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2547         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2548         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2549         for (i = 0; i < fw_size; i++)
2550                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2551         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2552
2553         /* ME */
2554         fw_data = (const __le32 *)
2555                 (adev->gfx.me_fw->data +
2556                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2557         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2558         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2559         for (i = 0; i < fw_size; i++)
2560                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2561         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2562
2563         return 0;
2564 }
2565
2566 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2567 {
2568         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2569         const struct cs_section_def *sect = NULL;
2570         const struct cs_extent_def *ext = NULL;
2571         int r, i, tmp;
2572
2573         /* init the CP */
2574         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2575         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2576
2577         gfx_v9_0_cp_gfx_enable(adev, true);
2578
2579         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2580         if (r) {
2581                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2582                 return r;
2583         }
2584
2585         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2586         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2587
2588         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2589         amdgpu_ring_write(ring, 0x80000000);
2590         amdgpu_ring_write(ring, 0x80000000);
2591
2592         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2593                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2594                         if (sect->id == SECT_CONTEXT) {
2595                                 amdgpu_ring_write(ring,
2596                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2597                                                ext->reg_count));
2598                                 amdgpu_ring_write(ring,
2599                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2600                                 for (i = 0; i < ext->reg_count; i++)
2601                                         amdgpu_ring_write(ring, ext->extent[i]);
2602                         }
2603                 }
2604         }
2605
2606         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2607         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2608
2609         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2610         amdgpu_ring_write(ring, 0);
2611
2612         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2613         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2614         amdgpu_ring_write(ring, 0x8000);
2615         amdgpu_ring_write(ring, 0x8000);
2616
2617         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2618         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2619                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2620         amdgpu_ring_write(ring, tmp);
2621         amdgpu_ring_write(ring, 0);
2622
2623         amdgpu_ring_commit(ring);
2624
2625         return 0;
2626 }
2627
2628 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2629 {
2630         struct amdgpu_ring *ring;
2631         u32 tmp;
2632         u32 rb_bufsz;
2633         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2634
2635         /* Set the write pointer delay */
2636         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2637
2638         /* set the RB to use vmid 0 */
2639         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2640
2641         /* Set ring buffer size */
2642         ring = &adev->gfx.gfx_ring[0];
2643         rb_bufsz = order_base_2(ring->ring_size / 8);
2644         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2645         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2646 #ifdef __BIG_ENDIAN
2647         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2648 #endif
2649         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2650
2651         /* Initialize the ring buffer's write pointers */
2652         ring->wptr = 0;
2653         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2654         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2655
2656         /* set the wb address wether it's enabled or not */
2657         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2658         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2659         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2660
2661         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2662         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2663         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2664
2665         mdelay(1);
2666         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2667
2668         rb_addr = ring->gpu_addr >> 8;
2669         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2670         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2671
2672         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2673         if (ring->use_doorbell) {
2674                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2675                                     DOORBELL_OFFSET, ring->doorbell_index);
2676                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2677                                     DOORBELL_EN, 1);
2678         } else {
2679                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2680         }
2681         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2682
2683         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2684                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2685         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2686
2687         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2688                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2689
2690
2691         /* start the ring */
2692         gfx_v9_0_cp_gfx_start(adev);
2693         ring->sched.ready = true;
2694
2695         return 0;
2696 }
2697
2698 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2699 {
2700         int i;
2701
2702         if (enable) {
2703                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2704         } else {
2705                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2706                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2707                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2708                         adev->gfx.compute_ring[i].sched.ready = false;
2709                 adev->gfx.kiq.ring.sched.ready = false;
2710         }
2711         udelay(50);
2712 }
2713
2714 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2715 {
2716         const struct gfx_firmware_header_v1_0 *mec_hdr;
2717         const __le32 *fw_data;
2718         unsigned i;
2719         u32 tmp;
2720
2721         if (!adev->gfx.mec_fw)
2722                 return -EINVAL;
2723
2724         gfx_v9_0_cp_compute_enable(adev, false);
2725
2726         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2727         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2728
2729         fw_data = (const __le32 *)
2730                 (adev->gfx.mec_fw->data +
2731                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2732         tmp = 0;
2733         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2734         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2735         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2736
2737         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2738                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2739         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2740                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2741
2742         /* MEC1 */
2743         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2744                          mec_hdr->jt_offset);
2745         for (i = 0; i < mec_hdr->jt_size; i++)
2746                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2747                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2748
2749         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2750                         adev->gfx.mec_fw_version);
2751         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2752
2753         return 0;
2754 }
2755
2756 /* KIQ functions */
2757 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2758 {
2759         uint32_t tmp;
2760         struct amdgpu_device *adev = ring->adev;
2761
2762         /* tell RLC which is KIQ queue */
2763         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2764         tmp &= 0xffffff00;
2765         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2766         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2767         tmp |= 0x80;
2768         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2769 }
2770
2771 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2772 {
2773         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2774         uint64_t queue_mask = 0;
2775         int r, i;
2776
2777         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2778                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2779                         continue;
2780
2781                 /* This situation may be hit in the future if a new HW
2782                  * generation exposes more than 64 queues. If so, the
2783                  * definition of queue_mask needs updating */
2784                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2785                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2786                         break;
2787                 }
2788
2789                 queue_mask |= (1ull << i);
2790         }
2791
2792         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2793         if (r) {
2794                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2795                 return r;
2796         }
2797
2798         /* set resources */
2799         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2800         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2801                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2802         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2803         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2804         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2805         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2806         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2807         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2808         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2809                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2810                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2811                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2812
2813                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2814                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2815                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2816                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2817                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2818                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2819                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2820                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2821                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2822                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2823                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2824                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2825                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2826                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2827                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2828                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2829                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2830         }
2831
2832         r = amdgpu_ring_test_helper(kiq_ring);
2833         if (r)
2834                 DRM_ERROR("KCQ enable failed\n");
2835
2836         return r;
2837 }
2838
2839 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2840 {
2841         struct amdgpu_device *adev = ring->adev;
2842         struct v9_mqd *mqd = ring->mqd_ptr;
2843         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2844         uint32_t tmp;
2845
2846         mqd->header = 0xC0310800;
2847         mqd->compute_pipelinestat_enable = 0x00000001;
2848         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2849         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2850         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2851         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2852         mqd->compute_misc_reserved = 0x00000003;
2853
2854         mqd->dynamic_cu_mask_addr_lo =
2855                 lower_32_bits(ring->mqd_gpu_addr
2856                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2857         mqd->dynamic_cu_mask_addr_hi =
2858                 upper_32_bits(ring->mqd_gpu_addr
2859                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2860
2861         eop_base_addr = ring->eop_gpu_addr >> 8;
2862         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2863         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2864
2865         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2866         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2867         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2868                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2869
2870         mqd->cp_hqd_eop_control = tmp;
2871
2872         /* enable doorbell? */
2873         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2874
2875         if (ring->use_doorbell) {
2876                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2877                                     DOORBELL_OFFSET, ring->doorbell_index);
2878                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2879                                     DOORBELL_EN, 1);
2880                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2881                                     DOORBELL_SOURCE, 0);
2882                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883                                     DOORBELL_HIT, 0);
2884         } else {
2885                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2886                                          DOORBELL_EN, 0);
2887         }
2888
2889         mqd->cp_hqd_pq_doorbell_control = tmp;
2890
2891         /* disable the queue if it's active */
2892         ring->wptr = 0;
2893         mqd->cp_hqd_dequeue_request = 0;
2894         mqd->cp_hqd_pq_rptr = 0;
2895         mqd->cp_hqd_pq_wptr_lo = 0;
2896         mqd->cp_hqd_pq_wptr_hi = 0;
2897
2898         /* set the pointer to the MQD */
2899         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2900         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2901
2902         /* set MQD vmid to 0 */
2903         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2904         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2905         mqd->cp_mqd_control = tmp;
2906
2907         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2908         hqd_gpu_addr = ring->gpu_addr >> 8;
2909         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2910         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2911
2912         /* set up the HQD, this is similar to CP_RB0_CNTL */
2913         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2914         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2915                             (order_base_2(ring->ring_size / 4) - 1));
2916         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2917                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2918 #ifdef __BIG_ENDIAN
2919         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2920 #endif
2921         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2922         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2923         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2924         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2925         mqd->cp_hqd_pq_control = tmp;
2926
2927         /* set the wb address whether it's enabled or not */
2928         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2929         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2930         mqd->cp_hqd_pq_rptr_report_addr_hi =
2931                 upper_32_bits(wb_gpu_addr) & 0xffff;
2932
2933         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2934         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2935         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2936         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2937
2938         tmp = 0;
2939         /* enable the doorbell if requested */
2940         if (ring->use_doorbell) {
2941                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2942                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2943                                 DOORBELL_OFFSET, ring->doorbell_index);
2944
2945                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2946                                          DOORBELL_EN, 1);
2947                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2948                                          DOORBELL_SOURCE, 0);
2949                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2950                                          DOORBELL_HIT, 0);
2951         }
2952
2953         mqd->cp_hqd_pq_doorbell_control = tmp;
2954
2955         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2956         ring->wptr = 0;
2957         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2958
2959         /* set the vmid for the queue */
2960         mqd->cp_hqd_vmid = 0;
2961
2962         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2963         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2964         mqd->cp_hqd_persistent_state = tmp;
2965
2966         /* set MIN_IB_AVAIL_SIZE */
2967         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2968         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2969         mqd->cp_hqd_ib_control = tmp;
2970
2971         /* activate the queue */
2972         mqd->cp_hqd_active = 1;
2973
2974         return 0;
2975 }
2976
2977 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2978 {
2979         struct amdgpu_device *adev = ring->adev;
2980         struct v9_mqd *mqd = ring->mqd_ptr;
2981         int j;
2982
2983         /* disable wptr polling */
2984         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2985
2986         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2987                mqd->cp_hqd_eop_base_addr_lo);
2988         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2989                mqd->cp_hqd_eop_base_addr_hi);
2990
2991 &n