Merge tag 'mmc-v5.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
313
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 {
316         switch (adev->asic_type) {
317         case CHIP_VEGA10:
318                 if (!amdgpu_virt_support_skip_setting(adev)) {
319                         soc15_program_register_sequence(adev,
320                                                          golden_settings_gc_9_0,
321                                                          ARRAY_SIZE(golden_settings_gc_9_0));
322                         soc15_program_register_sequence(adev,
323                                                          golden_settings_gc_9_0_vg10,
324                                                          ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325                 }
326                 break;
327         case CHIP_VEGA12:
328                 soc15_program_register_sequence(adev,
329                                                 golden_settings_gc_9_2_1,
330                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
331                 soc15_program_register_sequence(adev,
332                                                 golden_settings_gc_9_2_1_vg12,
333                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
334                 break;
335         case CHIP_VEGA20:
336                 soc15_program_register_sequence(adev,
337                                                 golden_settings_gc_9_0,
338                                                 ARRAY_SIZE(golden_settings_gc_9_0));
339                 soc15_program_register_sequence(adev,
340                                                 golden_settings_gc_9_0_vg20,
341                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342                 break;
343         case CHIP_RAVEN:
344                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345                                                 ARRAY_SIZE(golden_settings_gc_9_1));
346                 if (adev->rev_id >= 8)
347                         soc15_program_register_sequence(adev,
348                                                         golden_settings_gc_9_1_rv2,
349                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350                 else
351                         soc15_program_register_sequence(adev,
352                                                         golden_settings_gc_9_1_rv1,
353                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354                 break;
355         default:
356                 break;
357         }
358
359         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
361 }
362
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 {
365         adev->gfx.scratch.num_reg = 8;
366         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368 }
369
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371                                        bool wc, uint32_t reg, uint32_t val)
372 {
373         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375                                 WRITE_DATA_DST_SEL(0) |
376                                 (wc ? WR_CONFIRM : 0));
377         amdgpu_ring_write(ring, reg);
378         amdgpu_ring_write(ring, 0);
379         amdgpu_ring_write(ring, val);
380 }
381
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383                                   int mem_space, int opt, uint32_t addr0,
384                                   uint32_t addr1, uint32_t ref, uint32_t mask,
385                                   uint32_t inv)
386 {
387         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388         amdgpu_ring_write(ring,
389                                  /* memory (1) or register (0) */
390                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
392                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
393                                  WAIT_REG_MEM_ENGINE(eng_sel)));
394
395         if (mem_space)
396                 BUG_ON(addr0 & 0x3); /* Dword align */
397         amdgpu_ring_write(ring, addr0);
398         amdgpu_ring_write(ring, addr1);
399         amdgpu_ring_write(ring, ref);
400         amdgpu_ring_write(ring, mask);
401         amdgpu_ring_write(ring, inv); /* poll interval */
402 }
403
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 {
406         struct amdgpu_device *adev = ring->adev;
407         uint32_t scratch;
408         uint32_t tmp = 0;
409         unsigned i;
410         int r;
411
412         r = amdgpu_gfx_scratch_get(adev, &scratch);
413         if (r)
414                 return r;
415
416         WREG32(scratch, 0xCAFEDEAD);
417         r = amdgpu_ring_alloc(ring, 3);
418         if (r)
419                 goto error_free_scratch;
420
421         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423         amdgpu_ring_write(ring, 0xDEADBEEF);
424         amdgpu_ring_commit(ring);
425
426         for (i = 0; i < adev->usec_timeout; i++) {
427                 tmp = RREG32(scratch);
428                 if (tmp == 0xDEADBEEF)
429                         break;
430                 udelay(1);
431         }
432
433         if (i >= adev->usec_timeout)
434                 r = -ETIMEDOUT;
435
436 error_free_scratch:
437         amdgpu_gfx_scratch_free(adev, scratch);
438         return r;
439 }
440
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443         struct amdgpu_device *adev = ring->adev;
444         struct amdgpu_ib ib;
445         struct dma_fence *f = NULL;
446
447         unsigned index;
448         uint64_t gpu_addr;
449         uint32_t tmp;
450         long r;
451
452         r = amdgpu_device_wb_get(adev, &index);
453         if (r)
454                 return r;
455
456         gpu_addr = adev->wb.gpu_addr + (index * 4);
457         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458         memset(&ib, 0, sizeof(ib));
459         r = amdgpu_ib_get(adev, NULL, 16, &ib);
460         if (r)
461                 goto err1;
462
463         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465         ib.ptr[2] = lower_32_bits(gpu_addr);
466         ib.ptr[3] = upper_32_bits(gpu_addr);
467         ib.ptr[4] = 0xDEADBEEF;
468         ib.length_dw = 5;
469
470         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471         if (r)
472                 goto err2;
473
474         r = dma_fence_wait_timeout(f, false, timeout);
475         if (r == 0) {
476                 r = -ETIMEDOUT;
477                 goto err2;
478         } else if (r < 0) {
479                 goto err2;
480         }
481
482         tmp = adev->wb.wb[index];
483         if (tmp == 0xDEADBEEF)
484                 r = 0;
485         else
486                 r = -EINVAL;
487
488 err2:
489         amdgpu_ib_free(adev, &ib, NULL);
490         dma_fence_put(f);
491 err1:
492         amdgpu_device_wb_free(adev, index);
493         return r;
494 }
495
496
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 {
499         release_firmware(adev->gfx.pfp_fw);
500         adev->gfx.pfp_fw = NULL;
501         release_firmware(adev->gfx.me_fw);
502         adev->gfx.me_fw = NULL;
503         release_firmware(adev->gfx.ce_fw);
504         adev->gfx.ce_fw = NULL;
505         release_firmware(adev->gfx.rlc_fw);
506         adev->gfx.rlc_fw = NULL;
507         release_firmware(adev->gfx.mec_fw);
508         adev->gfx.mec_fw = NULL;
509         release_firmware(adev->gfx.mec2_fw);
510         adev->gfx.mec2_fw = NULL;
511
512         kfree(adev->gfx.rlc.register_list_format);
513 }
514
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 {
517         const struct rlc_firmware_header_v2_1 *rlc_hdr;
518
519         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534 }
535
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 {
538         adev->gfx.me_fw_write_wait = false;
539         adev->gfx.mec_fw_write_wait = false;
540
541         switch (adev->asic_type) {
542         case CHIP_VEGA10:
543                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544                     (adev->gfx.me_feature_version >= 42) &&
545                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
546                     (adev->gfx.pfp_feature_version >= 42))
547                         adev->gfx.me_fw_write_wait = true;
548
549                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
550                     (adev->gfx.mec_feature_version >= 42))
551                         adev->gfx.mec_fw_write_wait = true;
552                 break;
553         case CHIP_VEGA12:
554                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555                     (adev->gfx.me_feature_version >= 44) &&
556                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
557                     (adev->gfx.pfp_feature_version >= 44))
558                         adev->gfx.me_fw_write_wait = true;
559
560                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
561                     (adev->gfx.mec_feature_version >= 44))
562                         adev->gfx.mec_fw_write_wait = true;
563                 break;
564         case CHIP_VEGA20:
565                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566                     (adev->gfx.me_feature_version >= 44) &&
567                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
568                     (adev->gfx.pfp_feature_version >= 44))
569                         adev->gfx.me_fw_write_wait = true;
570
571                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
572                     (adev->gfx.mec_feature_version >= 44))
573                         adev->gfx.mec_fw_write_wait = true;
574                 break;
575         case CHIP_RAVEN:
576                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577                     (adev->gfx.me_feature_version >= 42) &&
578                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
579                     (adev->gfx.pfp_feature_version >= 42))
580                         adev->gfx.me_fw_write_wait = true;
581
582                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
583                     (adev->gfx.mec_feature_version >= 42))
584                         adev->gfx.mec_fw_write_wait = true;
585                 break;
586         default:
587                 break;
588         }
589 }
590
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 {
593         switch (adev->asic_type) {
594         case CHIP_VEGA10:
595         case CHIP_VEGA12:
596         case CHIP_VEGA20:
597                 break;
598         case CHIP_RAVEN:
599                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600                         break;
601                 if ((adev->gfx.rlc_fw_version != 106 &&
602                      adev->gfx.rlc_fw_version < 531) ||
603                     (adev->gfx.rlc_fw_version == 53815) ||
604                     (adev->gfx.rlc_feature_version < 1) ||
605                     !adev->gfx.rlc.is_rlc_v2_1)
606                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
607                 break;
608         default:
609                 break;
610         }
611 }
612
613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
614 {
615         const char *chip_name;
616         char fw_name[30];
617         int err;
618         struct amdgpu_firmware_info *info = NULL;
619         const struct common_firmware_header *header = NULL;
620         const struct gfx_firmware_header_v1_0 *cp_hdr;
621         const struct rlc_firmware_header_v2_0 *rlc_hdr;
622         unsigned int *tmp = NULL;
623         unsigned int i = 0;
624         uint16_t version_major;
625         uint16_t version_minor;
626         uint32_t smu_version;
627
628         DRM_DEBUG("\n");
629
630         switch (adev->asic_type) {
631         case CHIP_VEGA10:
632                 chip_name = "vega10";
633                 break;
634         case CHIP_VEGA12:
635                 chip_name = "vega12";
636                 break;
637         case CHIP_VEGA20:
638                 chip_name = "vega20";
639                 break;
640         case CHIP_RAVEN:
641                 if (adev->rev_id >= 8)
642                         chip_name = "raven2";
643                 else if (adev->pdev->device == 0x15d8)
644                         chip_name = "picasso";
645                 else
646                         chip_name = "raven";
647                 break;
648         default:
649                 BUG();
650         }
651
652         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
653         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
654         if (err)
655                 goto out;
656         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
657         if (err)
658                 goto out;
659         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
660         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
661         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
662
663         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
664         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
665         if (err)
666                 goto out;
667         err = amdgpu_ucode_validate(adev->gfx.me_fw);
668         if (err)
669                 goto out;
670         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
671         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
672         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
673
674         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
675         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
676         if (err)
677                 goto out;
678         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
679         if (err)
680                 goto out;
681         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
682         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
683         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
684
685         /*
686          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
687          * instead of picasso_rlc.bin.
688          * Judgment method:
689          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
690          *          or revision >= 0xD8 && revision <= 0xDF
691          * otherwise is PCO FP5
692          */
693         if (!strcmp(chip_name, "picasso") &&
694                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
695                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
696                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
697         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
698                 (smu_version >= 0x41e2b))
699                 /**
700                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
701                 */
702                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
703         else
704                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
705         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
706         if (err)
707                 goto out;
708         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
709         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
710
711         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
712         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
713         if (version_major == 2 && version_minor == 1)
714                 adev->gfx.rlc.is_rlc_v2_1 = true;
715
716         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
717         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
718         adev->gfx.rlc.save_and_restore_offset =
719                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
720         adev->gfx.rlc.clear_state_descriptor_offset =
721                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
722         adev->gfx.rlc.avail_scratch_ram_locations =
723                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
724         adev->gfx.rlc.reg_restore_list_size =
725                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
726         adev->gfx.rlc.reg_list_format_start =
727                         le32_to_cpu(rlc_hdr->reg_list_format_start);
728         adev->gfx.rlc.reg_list_format_separate_start =
729                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
730         adev->gfx.rlc.starting_offsets_start =
731                         le32_to_cpu(rlc_hdr->starting_offsets_start);
732         adev->gfx.rlc.reg_list_format_size_bytes =
733                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
734         adev->gfx.rlc.reg_list_size_bytes =
735                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
736         adev->gfx.rlc.register_list_format =
737                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
738                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
739         if (!adev->gfx.rlc.register_list_format) {
740                 err = -ENOMEM;
741                 goto out;
742         }
743
744         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
745                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
746         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
747                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
748
749         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
750
751         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
752                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
753         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
754                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
755
756         if (adev->gfx.rlc.is_rlc_v2_1)
757                 gfx_v9_0_init_rlc_ext_microcode(adev);
758
759         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
760         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
761         if (err)
762                 goto out;
763         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
764         if (err)
765                 goto out;
766         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
767         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
768         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
769
770
771         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
772         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
773         if (!err) {
774                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
775                 if (err)
776                         goto out;
777                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
778                 adev->gfx.mec2_fw->data;
779                 adev->gfx.mec2_fw_version =
780                 le32_to_cpu(cp_hdr->header.ucode_version);
781                 adev->gfx.mec2_feature_version =
782                 le32_to_cpu(cp_hdr->ucode_feature_version);
783         } else {
784                 err = 0;
785                 adev->gfx.mec2_fw = NULL;
786         }
787
788         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
789                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
790                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
791                 info->fw = adev->gfx.pfp_fw;
792                 header = (const struct common_firmware_header *)info->fw->data;
793                 adev->firmware.fw_size +=
794                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
795
796                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
797                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
798                 info->fw = adev->gfx.me_fw;
799                 header = (const struct common_firmware_header *)info->fw->data;
800                 adev->firmware.fw_size +=
801                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
802
803                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
804                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
805                 info->fw = adev->gfx.ce_fw;
806                 header = (const struct common_firmware_header *)info->fw->data;
807                 adev->firmware.fw_size +=
808                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
809
810                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
811                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
812                 info->fw = adev->gfx.rlc_fw;
813                 header = (const struct common_firmware_header *)info->fw->data;
814                 adev->firmware.fw_size +=
815                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
816
817                 if (adev->gfx.rlc.is_rlc_v2_1 &&
818                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
819                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
820                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
821                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
822                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
823                         info->fw = adev->gfx.rlc_fw;
824                         adev->firmware.fw_size +=
825                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
826
827                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
828                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
829                         info->fw = adev->gfx.rlc_fw;
830                         adev->firmware.fw_size +=
831                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
832
833                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
834                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
835                         info->fw = adev->gfx.rlc_fw;
836                         adev->firmware.fw_size +=
837                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
838                 }
839
840                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
841                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
842                 info->fw = adev->gfx.mec_fw;
843                 header = (const struct common_firmware_header *)info->fw->data;
844                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845                 adev->firmware.fw_size +=
846                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847
848                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
849                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
850                 info->fw = adev->gfx.mec_fw;
851                 adev->firmware.fw_size +=
852                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
853
854                 if (adev->gfx.mec2_fw) {
855                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
856                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
857                         info->fw = adev->gfx.mec2_fw;
858                         header = (const struct common_firmware_header *)info->fw->data;
859                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
860                         adev->firmware.fw_size +=
861                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
862                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
863                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
864                         info->fw = adev->gfx.mec2_fw;
865                         adev->firmware.fw_size +=
866                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
867                 }
868
869         }
870
871 out:
872         gfx_v9_0_check_if_need_gfxoff(adev);
873         gfx_v9_0_check_fw_write_wait(adev);
874         if (err) {
875                 dev_err(adev->dev,
876                         "gfx9: Failed to load firmware \"%s\"\n",
877                         fw_name);
878                 release_firmware(adev->gfx.pfp_fw);
879                 adev->gfx.pfp_fw = NULL;
880                 release_firmware(adev->gfx.me_fw);
881                 adev->gfx.me_fw = NULL;
882                 release_firmware(adev->gfx.ce_fw);
883                 adev->gfx.ce_fw = NULL;
884                 release_firmware(adev->gfx.rlc_fw);
885                 adev->gfx.rlc_fw = NULL;
886                 release_firmware(adev->gfx.mec_fw);
887                 adev->gfx.mec_fw = NULL;
888                 release_firmware(adev->gfx.mec2_fw);
889                 adev->gfx.mec2_fw = NULL;
890         }
891         return err;
892 }
893
894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
895 {
896         u32 count = 0;
897         const struct cs_section_def *sect = NULL;
898         const struct cs_extent_def *ext = NULL;
899
900         /* begin clear state */
901         count += 2;
902         /* context control state */
903         count += 3;
904
905         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
906                 for (ext = sect->section; ext->extent != NULL; ++ext) {
907                         if (sect->id == SECT_CONTEXT)
908                                 count += 2 + ext->reg_count;
909                         else
910                                 return 0;
911                 }
912         }
913
914         /* end clear state */
915         count += 2;
916         /* clear state */
917         count += 2;
918
919         return count;
920 }
921
922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
923                                     volatile u32 *buffer)
924 {
925         u32 count = 0, i;
926         const struct cs_section_def *sect = NULL;
927         const struct cs_extent_def *ext = NULL;
928
929         if (adev->gfx.rlc.cs_data == NULL)
930                 return;
931         if (buffer == NULL)
932                 return;
933
934         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
935         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
936
937         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
938         buffer[count++] = cpu_to_le32(0x80000000);
939         buffer[count++] = cpu_to_le32(0x80000000);
940
941         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
942                 for (ext = sect->section; ext->extent != NULL; ++ext) {
943                         if (sect->id == SECT_CONTEXT) {
944                                 buffer[count++] =
945                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
946                                 buffer[count++] = cpu_to_le32(ext->reg_index -
947                                                 PACKET3_SET_CONTEXT_REG_START);
948                                 for (i = 0; i < ext->reg_count; i++)
949                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
950                         } else {
951                                 return;
952                         }
953                 }
954         }
955
956         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
957         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
958
959         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
960         buffer[count++] = cpu_to_le32(0);
961 }
962
963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
964 {
965         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
966         uint32_t pg_always_on_cu_num = 2;
967         uint32_t always_on_cu_num;
968         uint32_t i, j, k;
969         uint32_t mask, cu_bitmap, counter;
970
971         if (adev->flags & AMD_IS_APU)
972                 always_on_cu_num = 4;
973         else if (adev->asic_type == CHIP_VEGA12)
974                 always_on_cu_num = 8;
975         else
976                 always_on_cu_num = 12;
977
978         mutex_lock(&adev->grbm_idx_mutex);
979         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
980                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
981                         mask = 1;
982                         cu_bitmap = 0;
983                         counter = 0;
984                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
985
986                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
987                                 if (cu_info->bitmap[i][j] & mask) {
988                                         if (counter == pg_always_on_cu_num)
989                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
990                                         if (counter < always_on_cu_num)
991                                                 cu_bitmap |= mask;
992                                         else
993                                                 break;
994                                         counter++;
995                                 }
996                                 mask <<= 1;
997                         }
998
999                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1000                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1001                 }
1002         }
1003         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1004         mutex_unlock(&adev->grbm_idx_mutex);
1005 }
1006
1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1008 {
1009         uint32_t data;
1010
1011         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1012         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1013         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1014         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1015         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1016
1017         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1018         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1019
1020         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1021         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1022
1023         mutex_lock(&adev->grbm_idx_mutex);
1024         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1025         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1026         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1027
1028         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1029         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1030         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1031         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1032         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1033
1034         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1035         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1036         data &= 0x0000FFFF;
1037         data |= 0x00C00000;
1038         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1039
1040         /*
1041          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1042          * programmed in gfx_v9_0_init_always_on_cu_mask()
1043          */
1044
1045         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1046          * but used for RLC_LB_CNTL configuration */
1047         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1048         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1049         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1050         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1051         mutex_unlock(&adev->grbm_idx_mutex);
1052
1053         gfx_v9_0_init_always_on_cu_mask(adev);
1054 }
1055
1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1057 {
1058         uint32_t data;
1059
1060         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1061         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1062         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1063         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1064         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1065
1066         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1067         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1068
1069         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1070         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1071
1072         mutex_lock(&adev->grbm_idx_mutex);
1073         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1074         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1075         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1076
1077         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1078         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1079         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1080         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1081         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1082
1083         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1084         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1085         data &= 0x0000FFFF;
1086         data |= 0x00C00000;
1087         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1088
1089         /*
1090          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1091          * programmed in gfx_v9_0_init_always_on_cu_mask()
1092          */
1093
1094         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1095          * but used for RLC_LB_CNTL configuration */
1096         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1097         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1098         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1099         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1100         mutex_unlock(&adev->grbm_idx_mutex);
1101
1102         gfx_v9_0_init_always_on_cu_mask(adev);
1103 }
1104
1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1106 {
1107         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1108 }
1109
1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1111 {
1112         return 5;
1113 }
1114
1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1116 {
1117         const struct cs_section_def *cs_data;
1118         int r;
1119
1120         adev->gfx.rlc.cs_data = gfx9_cs_data;
1121
1122         cs_data = adev->gfx.rlc.cs_data;
1123
1124         if (cs_data) {
1125                 /* init clear state block */
1126                 r = amdgpu_gfx_rlc_init_csb(adev);
1127                 if (r)
1128                         return r;
1129         }
1130
1131         if (adev->asic_type == CHIP_RAVEN) {
1132                 /* TODO: double check the cp_table_size for RV */
1133                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1134                 r = amdgpu_gfx_rlc_init_cpt(adev);
1135                 if (r)
1136                         return r;
1137         }
1138
1139         switch (adev->asic_type) {
1140         case CHIP_RAVEN:
1141                 gfx_v9_0_init_lbpw(adev);
1142                 break;
1143         case CHIP_VEGA20:
1144                 gfx_v9_4_init_lbpw(adev);
1145                 break;
1146         default:
1147                 break;
1148         }
1149
1150         return 0;
1151 }
1152
1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1154 {
1155         int r;
1156
1157         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1158         if (unlikely(r != 0))
1159                 return r;
1160
1161         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1162                         AMDGPU_GEM_DOMAIN_VRAM);
1163         if (!r)
1164                 adev->gfx.rlc.clear_state_gpu_addr =
1165                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1166
1167         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1168
1169         return r;
1170 }
1171
1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1173 {
1174         int r;
1175
1176         if (!adev->gfx.rlc.clear_state_obj)
1177                 return;
1178
1179         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1180         if (likely(r == 0)) {
1181                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1182                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1183         }
1184 }
1185
1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1187 {
1188         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1189         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1190 }
1191
1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1193 {
1194         int r;
1195         u32 *hpd;
1196         const __le32 *fw_data;
1197         unsigned fw_size;
1198         u32 *fw;
1199         size_t mec_hpd_size;
1200
1201         const struct gfx_firmware_header_v1_0 *mec_hdr;
1202
1203         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1204
1205         /* take ownership of the relevant compute queues */
1206         amdgpu_gfx_compute_queue_acquire(adev);
1207         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1208
1209         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1210                                       AMDGPU_GEM_DOMAIN_VRAM,
1211                                       &adev->gfx.mec.hpd_eop_obj,
1212                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1213                                       (void **)&hpd);
1214         if (r) {
1215                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1216                 gfx_v9_0_mec_fini(adev);
1217                 return r;
1218         }
1219
1220         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1221
1222         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1223         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1224
1225         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1226
1227         fw_data = (const __le32 *)
1228                 (adev->gfx.mec_fw->data +
1229                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1230         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1231
1232         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1233                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1234                                       &adev->gfx.mec.mec_fw_obj,
1235                                       &adev->gfx.mec.mec_fw_gpu_addr,
1236                                       (void **)&fw);
1237         if (r) {
1238                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1239                 gfx_v9_0_mec_fini(adev);
1240                 return r;
1241         }
1242
1243         memcpy(fw, fw_data, fw_size);
1244
1245         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1246         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1247
1248         return 0;
1249 }
1250
1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1252 {
1253         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1254                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1255                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1256                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1257                 (SQ_IND_INDEX__FORCE_READ_MASK));
1258         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1259 }
1260
1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1262                            uint32_t wave, uint32_t thread,
1263                            uint32_t regno, uint32_t num, uint32_t *out)
1264 {
1265         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1266                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1267                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1268                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1269                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1270                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1271                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1272         while (num--)
1273                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1274 }
1275
1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1277 {
1278         /* type 1 wave data */
1279         dst[(*no_fields)++] = 1;
1280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1288         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1289         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1290         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1291         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1292         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1293         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1294 }
1295
1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1297                                      uint32_t wave, uint32_t start,
1298                                      uint32_t size, uint32_t *dst)
1299 {
1300         wave_read_regs(
1301                 adev, simd, wave, 0,
1302                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1303 }
1304
1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1306                                      uint32_t wave, uint32_t thread,
1307                                      uint32_t start, uint32_t size,
1308                                      uint32_t *dst)
1309 {
1310         wave_read_regs(
1311                 adev, simd, wave, thread,
1312                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1313 }
1314
1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1316                                   u32 me, u32 pipe, u32 q, u32 vm)
1317 {
1318         soc15_grbm_select(adev, me, pipe, q, vm);
1319 }
1320
1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1322         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1323         .select_se_sh = &gfx_v9_0_select_se_sh,
1324         .read_wave_data = &gfx_v9_0_read_wave_data,
1325         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1326         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1327         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1328 };
1329
1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1331 {
1332         u32 gb_addr_config;
1333         int err;
1334
1335         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1336
1337         switch (adev->asic_type) {
1338         case CHIP_VEGA10:
1339                 adev->gfx.config.max_hw_contexts = 8;
1340                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1344                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1345                 break;
1346         case CHIP_VEGA12:
1347                 adev->gfx.config.max_hw_contexts = 8;
1348                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1349                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1350                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1351                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1352                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1353                 DRM_INFO("fix gfx.config for vega12\n");
1354                 break;
1355         case CHIP_VEGA20:
1356                 adev->gfx.config.max_hw_contexts = 8;
1357                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1361                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1362                 gb_addr_config &= ~0xf3e777ff;
1363                 gb_addr_config |= 0x22014042;
1364                 /* check vbios table if gpu info is not available */
1365                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1366                 if (err)
1367                         return err;
1368                 break;
1369         case CHIP_RAVEN:
1370                 adev->gfx.config.max_hw_contexts = 8;
1371                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1372                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1373                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1374                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1375                 if (adev->rev_id >= 8)
1376                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1377                 else
1378                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1379                 break;
1380         default:
1381                 BUG();
1382                 break;
1383         }
1384
1385         adev->gfx.config.gb_addr_config = gb_addr_config;
1386
1387         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1388                         REG_GET_FIELD(
1389                                         adev->gfx.config.gb_addr_config,
1390                                         GB_ADDR_CONFIG,
1391                                         NUM_PIPES);
1392
1393         adev->gfx.config.max_tile_pipes =
1394                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1395
1396         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1397                         REG_GET_FIELD(
1398                                         adev->gfx.config.gb_addr_config,
1399                                         GB_ADDR_CONFIG,
1400                                         NUM_BANKS);
1401         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1402                         REG_GET_FIELD(
1403                                         adev->gfx.config.gb_addr_config,
1404                                         GB_ADDR_CONFIG,
1405                                         MAX_COMPRESSED_FRAGS);
1406         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1407                         REG_GET_FIELD(
1408                                         adev->gfx.config.gb_addr_config,
1409                                         GB_ADDR_CONFIG,
1410                                         NUM_RB_PER_SE);
1411         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1412                         REG_GET_FIELD(
1413                                         adev->gfx.config.gb_addr_config,
1414                                         GB_ADDR_CONFIG,
1415                                         NUM_SHADER_ENGINES);
1416         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1417                         REG_GET_FIELD(
1418                                         adev->gfx.config.gb_addr_config,
1419                                         GB_ADDR_CONFIG,
1420                                         PIPE_INTERLEAVE_SIZE));
1421
1422         return 0;
1423 }
1424
1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1426                                    struct amdgpu_ngg_buf *ngg_buf,
1427                                    int size_se,
1428                                    int default_size_se)
1429 {
1430         int r;
1431
1432         if (size_se < 0) {
1433                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1434                 return -EINVAL;
1435         }
1436         size_se = size_se ? size_se : default_size_se;
1437
1438         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1439         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1440                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1441                                     &ngg_buf->bo,
1442                                     &ngg_buf->gpu_addr,
1443                                     NULL);
1444         if (r) {
1445                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1446                 return r;
1447         }
1448         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1449
1450         return r;
1451 }
1452
1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1454 {
1455         int i;
1456
1457         for (i = 0; i < NGG_BUF_MAX; i++)
1458                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1459                                       &adev->gfx.ngg.buf[i].gpu_addr,
1460                                       NULL);
1461
1462         memset(&adev->gfx.ngg.buf[0], 0,
1463                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1464
1465         adev->gfx.ngg.init = false;
1466
1467         return 0;
1468 }
1469
1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1471 {
1472         int r;
1473
1474         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1475                 return 0;
1476
1477         /* GDS reserve memory: 64 bytes alignment */
1478         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1479         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1480         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1481         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1482
1483         /* Primitive Buffer */
1484         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1485                                     amdgpu_prim_buf_per_se,
1486                                     64 * 1024);
1487         if (r) {
1488                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1489                 goto err;
1490         }
1491
1492         /* Position Buffer */
1493         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1494                                     amdgpu_pos_buf_per_se,
1495                                     256 * 1024);
1496         if (r) {
1497                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1498                 goto err;
1499         }
1500
1501         /* Control Sideband */
1502         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1503                                     amdgpu_cntl_sb_buf_per_se,
1504                                     256);
1505         if (r) {
1506                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1507                 goto err;
1508         }
1509
1510         /* Parameter Cache, not created by default */
1511         if (amdgpu_param_buf_per_se <= 0)
1512                 goto out;
1513
1514         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1515                                     amdgpu_param_buf_per_se,
1516                                     512 * 1024);
1517         if (r) {
1518                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1519                 goto err;
1520         }
1521
1522 out:
1523         adev->gfx.ngg.init = true;
1524         return 0;
1525 err:
1526         gfx_v9_0_ngg_fini(adev);
1527         return r;
1528 }
1529
1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1531 {
1532         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1533         int r;
1534         u32 data, base;
1535
1536         if (!amdgpu_ngg)
1537                 return 0;
1538
1539         /* Program buffer size */
1540         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1541                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1542         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1543                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1544         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1545
1546         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1547                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1548         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1549                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1550         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1551
1552         /* Program buffer base address */
1553         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1555         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1556
1557         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1559         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1560
1561         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1563         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1564
1565         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1567         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1568
1569         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1571         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1572
1573         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1575         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1576
1577         /* Clear GDS reserved memory */
1578         r = amdgpu_ring_alloc(ring, 17);
1579         if (r) {
1580                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1581                           ring->name, r);
1582                 return r;
1583         }
1584
1585         gfx_v9_0_write_data_to_reg(ring, 0, false,
1586                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1587                                    (adev->gds.gds_size +
1588                                     adev->gfx.ngg.gds_reserve_size));
1589
1590         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1591         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1592                                 PACKET3_DMA_DATA_DST_SEL(1) |
1593                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1594         amdgpu_ring_write(ring, 0);
1595         amdgpu_ring_write(ring, 0);
1596         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1597         amdgpu_ring_write(ring, 0);
1598         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1599                                 adev->gfx.ngg.gds_reserve_size);
1600
1601         gfx_v9_0_write_data_to_reg(ring, 0, false,
1602                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1603
1604         amdgpu_ring_commit(ring);
1605
1606         return 0;
1607 }
1608
1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1610                                       int mec, int pipe, int queue)
1611 {
1612         int r;
1613         unsigned irq_type;
1614         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1615
1616         ring = &adev->gfx.compute_ring[ring_id];
1617
1618         /* mec0 is me1 */
1619         ring->me = mec + 1;
1620         ring->pipe = pipe;
1621         ring->queue = queue;
1622
1623         ring->ring_obj = NULL;
1624         ring->use_doorbell = true;
1625         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1626         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1627                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1628         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1629
1630         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1631                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1632                 + ring->pipe;
1633
1634         /* type-2 packets are deprecated on MEC, use type-3 instead */
1635         r = amdgpu_ring_init(adev, ring, 1024,
1636                              &adev->gfx.eop_irq, irq_type);
1637         if (r)
1638                 return r;
1639
1640
1641         return 0;
1642 }
1643
1644 static int gfx_v9_0_sw_init(void *handle)
1645 {
1646         int i, j, k, r, ring_id;
1647         struct amdgpu_ring *ring;
1648         struct amdgpu_kiq *kiq;
1649         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650
1651         switch (adev->asic_type) {
1652         case CHIP_VEGA10:
1653         case CHIP_VEGA12:
1654         case CHIP_VEGA20:
1655         case CHIP_RAVEN:
1656                 adev->gfx.mec.num_mec = 2;
1657                 break;
1658         default:
1659                 adev->gfx.mec.num_mec = 1;
1660                 break;
1661         }
1662
1663         adev->gfx.mec.num_pipe_per_mec = 4;
1664         adev->gfx.mec.num_queue_per_pipe = 8;
1665
1666         /* EOP Event */
1667         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1668         if (r)
1669                 return r;
1670
1671         /* Privileged reg */
1672         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1673                               &adev->gfx.priv_reg_irq);
1674         if (r)
1675                 return r;
1676
1677         /* Privileged inst */
1678         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1679                               &adev->gfx.priv_inst_irq);
1680         if (r)
1681                 return r;
1682
1683         /* ECC error */
1684         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1685                               &adev->gfx.cp_ecc_error_irq);
1686         if (r)
1687                 return r;
1688
1689         /* FUE error */
1690         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1691                               &adev->gfx.cp_ecc_error_irq);
1692         if (r)
1693                 return r;
1694
1695         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1696
1697         gfx_v9_0_scratch_init(adev);
1698
1699         r = gfx_v9_0_init_microcode(adev);
1700         if (r) {
1701                 DRM_ERROR("Failed to load gfx firmware!\n");
1702                 return r;
1703         }
1704
1705         r = adev->gfx.rlc.funcs->init(adev);
1706         if (r) {
1707                 DRM_ERROR("Failed to init rlc BOs!\n");
1708                 return r;
1709         }
1710
1711         r = gfx_v9_0_mec_init(adev);
1712         if (r) {
1713                 DRM_ERROR("Failed to init MEC BOs!\n");
1714                 return r;
1715         }
1716
1717         /* set up the gfx ring */
1718         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1719                 ring = &adev->gfx.gfx_ring[i];
1720                 ring->ring_obj = NULL;
1721                 if (!i)
1722                         sprintf(ring->name, "gfx");
1723                 else
1724                         sprintf(ring->name, "gfx_%d", i);
1725                 ring->use_doorbell = true;
1726                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1727                 r = amdgpu_ring_init(adev, ring, 1024,
1728                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1729                 if (r)
1730                         return r;
1731         }
1732
1733         /* set up the compute queues - allocate horizontally across pipes */
1734         ring_id = 0;
1735         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1736                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1737                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1738                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1739                                         continue;
1740
1741                                 r = gfx_v9_0_compute_ring_init(adev,
1742                                                                ring_id,
1743                                                                i, k, j);
1744                                 if (r)
1745                                         return r;
1746
1747                                 ring_id++;
1748                         }
1749                 }
1750         }
1751
1752         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1753         if (r) {
1754                 DRM_ERROR("Failed to init KIQ BOs!\n");
1755                 return r;
1756         }
1757
1758         kiq = &adev->gfx.kiq;
1759         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1760         if (r)
1761                 return r;
1762
1763         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1764         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1765         if (r)
1766                 return r;
1767
1768         adev->gfx.ce_ram_size = 0x8000;
1769
1770         r = gfx_v9_0_gpu_early_init(adev);
1771         if (r)
1772                 return r;
1773
1774         r = gfx_v9_0_ngg_init(adev);
1775         if (r)
1776                 return r;
1777
1778         return 0;
1779 }
1780
1781
1782 static int gfx_v9_0_sw_fini(void *handle)
1783 {
1784         int i;
1785         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1786
1787         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1788                         adev->gfx.ras_if) {
1789                 struct ras_common_if *ras_if = adev->gfx.ras_if;
1790                 struct ras_ih_if ih_info = {
1791                         .head = *ras_if,
1792                 };
1793
1794                 amdgpu_ras_debugfs_remove(adev, ras_if);
1795                 amdgpu_ras_sysfs_remove(adev, ras_if);
1796                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1797                 amdgpu_ras_feature_enable(adev, ras_if, 0);
1798                 kfree(ras_if);
1799         }
1800
1801         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1802                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1803         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1804                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1805
1806         amdgpu_gfx_mqd_sw_fini(adev);
1807         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1808         amdgpu_gfx_kiq_fini(adev);
1809
1810         gfx_v9_0_mec_fini(adev);
1811         gfx_v9_0_ngg_fini(adev);
1812         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1813         if (adev->asic_type == CHIP_RAVEN) {
1814                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1815                                 &adev->gfx.rlc.cp_table_gpu_addr,
1816                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1817         }
1818         gfx_v9_0_free_microcode(adev);
1819
1820         return 0;
1821 }
1822
1823
1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1825 {
1826         /* TODO */
1827 }
1828
1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1830 {
1831         u32 data;
1832
1833         if (instance == 0xffffffff)
1834                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1835         else
1836                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1837
1838         if (se_num == 0xffffffff)
1839                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1840         else
1841                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1842
1843         if (sh_num == 0xffffffff)
1844                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1845         else
1846                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1847
1848         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1849 }
1850
1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1852 {
1853         u32 data, mask;
1854
1855         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1856         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1857
1858         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1859         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1860
1861         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1862                                          adev->gfx.config.max_sh_per_se);
1863
1864         return (~data) & mask;
1865 }
1866
1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1868 {
1869         int i, j;
1870         u32 data;
1871         u32 active_rbs = 0;
1872         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1873                                         adev->gfx.config.max_sh_per_se;
1874
1875         mutex_lock(&adev->grbm_idx_mutex);
1876         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1877                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1878                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1879                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1880                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1881                                                rb_bitmap_width_per_sh);
1882                 }
1883         }
1884         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1885         mutex_unlock(&adev->grbm_idx_mutex);
1886
1887         adev->gfx.config.backend_enable_mask = active_rbs;
1888         adev->gfx.config.num_rbs = hweight32(active_rbs);
1889 }
1890
1891 #define DEFAULT_SH_MEM_BASES    (0x6000)
1892 #define FIRST_COMPUTE_VMID      (8)
1893 #define LAST_COMPUTE_VMID       (16)
1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1895 {
1896         int i;
1897         uint32_t sh_mem_config;
1898         uint32_t sh_mem_bases;
1899
1900         /*
1901          * Configure apertures:
1902          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1903          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1904          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1905          */
1906         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1907
1908         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1909                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1910                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1911
1912         mutex_lock(&adev->srbm_mutex);
1913         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1914                 soc15_grbm_select(adev, 0, 0, 0, i);
1915                 /* CP and shaders */
1916                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1917                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1918         }
1919         soc15_grbm_select(adev, 0, 0, 0, 0);
1920         mutex_unlock(&adev->srbm_mutex);
1921
1922         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1923            acccess. These should be enabled by FW for target VMIDs. */
1924         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1925                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
1926                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
1927                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
1928                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
1929         }
1930 }
1931
1932 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1933 {
1934         u32 tmp;
1935         int i;
1936
1937         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1938
1939         gfx_v9_0_tiling_mode_table_init(adev);
1940
1941         gfx_v9_0_setup_rb(adev);
1942         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1943         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1944
1945         /* XXX SH_MEM regs */
1946         /* where to put LDS, scratch, GPUVM in FSA64 space */
1947         mutex_lock(&adev->srbm_mutex);
1948         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1949                 soc15_grbm_select(adev, 0, 0, 0, i);
1950                 /* CP and shaders */
1951                 if (i == 0) {
1952                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1953                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1954                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1955                                             !!amdgpu_noretry);
1956                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1957                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1958                 } else {
1959                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1960                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1961                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1962                                             !!amdgpu_noretry);
1963                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1964                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1965                                 (adev->gmc.private_aperture_start >> 48));
1966                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1967                                 (adev->gmc.shared_aperture_start >> 48));
1968                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1969                 }
1970         }
1971         soc15_grbm_select(adev, 0, 0, 0, 0);
1972
1973         mutex_unlock(&adev->srbm_mutex);
1974
1975         gfx_v9_0_init_compute_vmid(adev);
1976 }
1977
1978 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1979 {
1980         u32 i, j, k;
1981         u32 mask;
1982
1983         mutex_lock(&adev->grbm_idx_mutex);
1984         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1985                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1986                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1987                         for (k = 0; k < adev->usec_timeout; k++) {
1988                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1989                                         break;
1990                                 udelay(1);
1991                         }
1992                         if (k == adev->usec_timeout) {
1993                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1994                                                       0xffffffff, 0xffffffff);
1995                                 mutex_unlock(&adev->grbm_idx_mutex);
1996                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1997                                          i, j);
1998                                 return;
1999                         }
2000                 }
2001         }
2002         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2003         mutex_unlock(&adev->grbm_idx_mutex);
2004
2005         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2006                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2007                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2008                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2009         for (k = 0; k < adev->usec_timeout; k++) {
2010                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2011                         break;
2012                 udelay(1);
2013         }
2014 }
2015
2016 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2017                                                bool enable)
2018 {
2019         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2020
2021         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2022         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2023         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2024         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2025
2026         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2027 }
2028
2029 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2030 {
2031         /* csib */
2032         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2033                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2034         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2035                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2036         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2037                         adev->gfx.rlc.clear_state_size);
2038 }
2039
2040 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2041                                 int indirect_offset,
2042                                 int list_size,
2043                                 int *unique_indirect_regs,
2044                                 int unique_indirect_reg_count,
2045                                 int *indirect_start_offsets,
2046                                 int *indirect_start_offsets_count,
2047                                 int max_start_offsets_count)
2048 {
2049         int idx;
2050
2051         for (; indirect_offset < list_size; indirect_offset++) {
2052                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2053                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2054                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2055
2056                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2057                         indirect_offset += 2;
2058
2059                         /* look for the matching indice */
2060                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2061                                 if (unique_indirect_regs[idx] ==
2062                                         register_list_format[indirect_offset] ||
2063                                         !unique_indirect_regs[idx])
2064                                         break;
2065                         }
2066
2067                         BUG_ON(idx >= unique_indirect_reg_count);
2068
2069                         if (!unique_indirect_regs[idx])
2070                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2071
2072                         indirect_offset++;
2073                 }
2074         }
2075 }
2076
2077 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2078 {
2079         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2080         int unique_indirect_reg_count = 0;
2081
2082         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2083         int indirect_start_offsets_count = 0;
2084
2085         int list_size = 0;
2086         int i = 0, j = 0;
2087         u32 tmp = 0;
2088
2089         u32 *register_list_format =
2090                 kmemdup(adev->gfx.rlc.register_list_format,
2091                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2092         if (!register_list_format)
2093                 return -ENOMEM;
2094
2095         /* setup unique_indirect_regs array and indirect_start_offsets array */
2096         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2097         gfx_v9_1_parse_ind_reg_list(register_list_format,
2098                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2099                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2100                                     unique_indirect_regs,
2101                                     unique_indirect_reg_count,
2102                                     indirect_start_offsets,
2103                                     &indirect_start_offsets_count,
2104                                     ARRAY_SIZE(indirect_start_offsets));
2105
2106         /* enable auto inc in case it is disabled */
2107         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2108         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2109         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2110
2111         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2112         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2113                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2114         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2115                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2116                         adev->gfx.rlc.register_restore[i]);
2117
2118         /* load indirect register */
2119         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2120                 adev->gfx.rlc.reg_list_format_start);
2121
2122         /* direct register portion */
2123         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2124                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2125                         register_list_format[i]);
2126
2127         /* indirect register portion */
2128         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2129                 if (register_list_format[i] == 0xFFFFFFFF) {
2130                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2131                         continue;
2132                 }
2133
2134                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2135                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2136
2137                 for (j = 0; j < unique_indirect_reg_count; j++) {
2138                         if (register_list_format[i] == unique_indirect_regs[j]) {
2139                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2140                                 break;
2141                         }
2142                 }
2143
2144                 BUG_ON(j >= unique_indirect_reg_count);
2145
2146                 i++;
2147         }
2148
2149         /* set save/restore list size */
2150         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2151         list_size = list_size >> 1;
2152         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2153                 adev->gfx.rlc.reg_restore_list_size);
2154         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2155
2156         /* write the starting offsets to RLC scratch ram */
2157         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2158                 adev->gfx.rlc.starting_offsets_start);
2159         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2160                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2161                        indirect_start_offsets[i]);
2162
2163         /* load unique indirect regs*/
2164         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2165                 if (unique_indirect_regs[i] != 0) {
2166                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2167                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2168                                unique_indirect_regs[i] & 0x3FFFF);
2169
2170                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2171                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2172                                unique_indirect_regs[i] >> 20);
2173                 }
2174         }
2175
2176         kfree(register_list_format);
2177         return 0;
2178 }
2179
2180 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2181 {
2182         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2183 }
2184
2185 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2186                                              bool enable)
2187 {
2188         uint32_t data = 0;
2189         uint32_t default_data = 0;
2190
2191         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2192         if (enable == true) {
2193                 /* enable GFXIP control over CGPG */
2194                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2195                 if(default_data != data)
2196                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2197
2198                 /* update status */
2199                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2200                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2201                 if(default_data != data)
2202                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2203         } else {
2204                 /* restore GFXIP control over GCPG */
2205                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2206                 if(default_data != data)
2207                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2208         }
2209 }
2210
2211 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2212 {
2213         uint32_t data = 0;
2214
2215         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2216                               AMD_PG_SUPPORT_GFX_SMG |
2217                               AMD_PG_SUPPORT_GFX_DMG)) {
2218                 /* init IDLE_POLL_COUNT = 60 */
2219                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2220                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2221                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2222                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2223
2224                 /* init RLC PG Delay */
2225                 data = 0;
2226                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2227                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2228                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2229                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2230                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2231
2232                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2233                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2234                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2235                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2236
2237                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2238                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2239                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2240                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2241
2242                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2243                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2244
2245                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2246                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2247                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2248
2249                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2250         }
2251 }
2252
2253 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2254                                                 bool enable)
2255 {
2256         uint32_t data = 0;
2257         uint32_t default_data = 0;
2258
2259         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2260         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2261                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2262                              enable ? 1 : 0);
2263         if (default_data != data)
2264                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2265 }
2266
2267 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2268                                                 bool enable)
2269 {
2270         uint32_t data = 0;
2271         uint32_t default_data = 0;
2272
2273         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2274         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2275                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2276                              enable ? 1 : 0);
2277         if(default_data != data)
2278                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2279 }
2280
2281 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2282                                         bool enable)
2283 {
2284         uint32_t data = 0;
2285         uint32_t default_data = 0;
2286
2287         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2288         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2289                              CP_PG_DISABLE,
2290                              enable ? 0 : 1);
2291         if(default_data != data)
2292                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2293 }
2294
2295 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2296                                                 bool enable)
2297 {
2298         uint32_t data, default_data;
2299
2300         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2301         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2302                              GFX_POWER_GATING_ENABLE,
2303                              enable ? 1 : 0);
2304         if(default_data != data)
2305                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2306 }
2307
2308 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2309                                                 bool enable)
2310 {
2311         uint32_t data, default_data;
2312
2313         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2314         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2315                              GFX_PIPELINE_PG_ENABLE,
2316                              enable ? 1 : 0);
2317         if(default_data != data)
2318                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2319
2320         if (!enable)
2321                 /* read any GFX register to wake up GFX */
2322                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2323 }
2324
2325 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2326                                                        bool enable)
2327 {
2328         uint32_t data, default_data;
2329
2330         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2331         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2332                              STATIC_PER_CU_PG_ENABLE,
2333                              enable ? 1 : 0);
2334         if(default_data != data)
2335                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2336 }
2337
2338 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2339                                                 bool enable)
2340 {
2341         uint32_t data, default_data;
2342
2343         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2344         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2345                              DYN_PER_CU_PG_ENABLE,
2346                              enable ? 1 : 0);
2347         if(default_data != data)
2348                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2349 }
2350
2351 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2352 {
2353         gfx_v9_0_init_csb(adev);
2354
2355         /*
2356          * Rlc save restore list is workable since v2_1.
2357          * And it's needed by gfxoff feature.
2358          */
2359         if (adev->gfx.rlc.is_rlc_v2_1) {
2360                 gfx_v9_1_init_rlc_save_restore_list(adev);
2361                 gfx_v9_0_enable_save_restore_machine(adev);
2362         }
2363
2364         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2365                               AMD_PG_SUPPORT_GFX_SMG |
2366                               AMD_PG_SUPPORT_GFX_DMG |
2367                               AMD_PG_SUPPORT_CP |
2368                               AMD_PG_SUPPORT_GDS |
2369                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2370                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2371                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2372                 gfx_v9_0_init_gfx_power_gating(adev);
2373         }
2374 }
2375
2376 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2377 {
2378         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2379         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2380         gfx_v9_0_wait_for_rlc_serdes(adev);
2381 }
2382
2383 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2384 {
2385         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2386         udelay(50);
2387         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2388         udelay(50);
2389 }
2390
2391 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2392 {
2393 #ifdef AMDGPU_RLC_DEBUG_RETRY
2394         u32 rlc_ucode_ver;
2395 #endif
2396
2397         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2398         udelay(50);
2399
2400         /* carrizo do enable cp interrupt after cp inited */
2401         if (!(adev->flags & AMD_IS_APU)) {
2402                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2403                 udelay(50);
2404         }
2405
2406 #ifdef AMDGPU_RLC_DEBUG_RETRY
2407         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2408         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2409         if(rlc_ucode_ver == 0x108) {
2410                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2411                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2412                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2413                  * default is 0x9C4 to create a 100us interval */
2414                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2415                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2416                  * to disable the page fault retry interrupts, default is
2417                  * 0x100 (256) */
2418                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2419         }
2420 #endif
2421 }
2422
2423 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2424 {
2425         const struct rlc_firmware_header_v2_0 *hdr;
2426         const __le32 *fw_data;
2427         unsigned i, fw_size;
2428
2429         if (!adev->gfx.rlc_fw)
2430                 return -EINVAL;
2431
2432         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2433         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2434
2435         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2436                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2437         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2438
2439         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2440                         RLCG_UCODE_LOADING_START_ADDRESS);
2441         for (i = 0; i < fw_size; i++)
2442                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2443         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2444
2445         return 0;
2446 }
2447
2448 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2449 {
2450         int r;
2451
2452         if (amdgpu_sriov_vf(adev)) {
2453                 gfx_v9_0_init_csb(adev);
2454                 return 0;
2455         }
2456
2457         adev->gfx.rlc.funcs->stop(adev);
2458
2459         /* disable CG */
2460         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2461
2462         gfx_v9_0_init_pg(adev);
2463
2464         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2465                 /* legacy rlc firmware loading */
2466                 r = gfx_v9_0_rlc_load_microcode(adev);
2467                 if (r)
2468                         return r;
2469         }
2470
2471         switch (adev->asic_type) {
2472         case CHIP_RAVEN:
2473                 if (amdgpu_lbpw == 0)
2474                         gfx_v9_0_enable_lbpw(adev, false);
2475                 else
2476                         gfx_v9_0_enable_lbpw(adev, true);
2477                 break;
2478         case CHIP_VEGA20:
2479                 if (amdgpu_lbpw > 0)
2480                         gfx_v9_0_enable_lbpw(adev, true);
2481                 else
2482                         gfx_v9_0_enable_lbpw(adev, false);
2483                 break;
2484         default:
2485                 break;
2486         }
2487
2488         adev->gfx.rlc.funcs->start(adev);
2489
2490         return 0;
2491 }
2492
2493 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2494 {
2495         int i;
2496         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2497
2498         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2499         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2500         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2501         if (!enable) {
2502                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2503                         adev->gfx.gfx_ring[i].sched.ready = false;
2504         }
2505         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2506         udelay(50);
2507 }
2508
2509 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2510 {
2511         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2512         const struct gfx_firmware_header_v1_0 *ce_hdr;
2513         const struct gfx_firmware_header_v1_0 *me_hdr;
2514         const __le32 *fw_data;
2515         unsigned i, fw_size;
2516
2517         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2518                 return -EINVAL;
2519
2520         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2521                 adev->gfx.pfp_fw->data;
2522         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2523                 adev->gfx.ce_fw->data;
2524         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2525                 adev->gfx.me_fw->data;
2526
2527         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2528         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2529         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2530
2531         gfx_v9_0_cp_gfx_enable(adev, false);
2532
2533         /* PFP */
2534         fw_data = (const __le32 *)
2535                 (adev->gfx.pfp_fw->data +
2536                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2537         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2538         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2539         for (i = 0; i < fw_size; i++)
2540                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2541         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2542
2543         /* CE */
2544         fw_data = (const __le32 *)
2545                 (adev->gfx.ce_fw->data +
2546                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2547         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2548         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2549         for (i = 0; i < fw_size; i++)
2550                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2551         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2552
2553         /* ME */
2554         fw_data = (const __le32 *)
2555                 (adev->gfx.me_fw->data +
2556                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2557         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2558         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2559         for (i = 0; i < fw_size; i++)
2560                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2561         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2562
2563         return 0;
2564 }
2565
2566 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2567 {
2568         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2569         const struct cs_section_def *sect = NULL;
2570         const struct cs_extent_def *ext = NULL;
2571         int r, i, tmp;
2572
2573         /* init the CP */
2574         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2575         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2576
2577         gfx_v9_0_cp_gfx_enable(adev, true);
2578
2579         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2580         if (r) {
2581                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2582                 return r;
2583         }
2584
2585         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2586         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2587
2588         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2589         amdgpu_ring_write(ring, 0x80000000);
2590         amdgpu_ring_write(ring, 0x80000000);
2591
2592         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2593                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2594                         if (sect->id == SECT_CONTEXT) {
2595                                 amdgpu_ring_write(ring,
2596                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2597                                                ext->reg_count));
2598                                 amdgpu_ring_write(ring,
2599                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2600                                 for (i = 0; i < ext->reg_count; i++)
2601                                         amdgpu_ring_write(ring, ext->extent[i]);
2602                         }
2603                 }
2604         }
2605
2606         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2607         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2608
2609         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2610         amdgpu_ring_write(ring, 0);
2611
2612         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2613         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2614         amdgpu_ring_write(ring, 0x8000);
2615         amdgpu_ring_write(ring, 0x8000);
2616
2617         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2618         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2619                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2620         amdgpu_ring_write(ring, tmp);
2621         amdgpu_ring_write(ring, 0);
2622
2623         amdgpu_ring_commit(ring);
2624
2625         return 0;
2626 }
2627
2628 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2629 {
2630         struct amdgpu_ring *ring;
2631         u32 tmp;
2632         u32 rb_bufsz;
2633         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2634
2635         /* Set the write pointer delay */
2636         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2637
2638         /* set the RB to use vmid 0 */
2639         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2640
2641         /* Set ring buffer size */
2642         ring = &adev->gfx.gfx_ring[0];
2643         rb_bufsz = order_base_2(ring->ring_size / 8);
2644         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2645         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2646 #ifdef __BIG_ENDIAN
2647         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2648 #endif
2649         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2650
2651         /* Initialize the ring buffer's write pointers */
2652         ring->wptr = 0;
2653         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2654         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2655
2656         /* set the wb address wether it's enabled or not */
2657         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2658         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2659         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2660
2661         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2662         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2663         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2664
2665         mdelay(1);
2666         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2667
2668         rb_addr = ring->gpu_addr >> 8;
2669         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2670         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2671
2672         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2673         if (ring->use_doorbell) {
2674                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2675                                     DOORBELL_OFFSET, ring->doorbell_index);
2676                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2677                                     DOORBELL_EN, 1);
2678         } else {
2679                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2680         }
2681         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2682
2683         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2684                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2685         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2686
2687         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2688                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2689
2690
2691         /* start the ring */
2692         gfx_v9_0_cp_gfx_start(adev);
2693         ring->sched.ready = true;
2694
2695         return 0;
2696 }
2697
2698 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2699 {
2700         int i;
2701
2702         if (enable) {
2703                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2704         } else {
2705                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2706                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2707                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2708                         adev->gfx.compute_ring[i].sched.ready = false;
2709                 adev->gfx.kiq.ring.sched.ready = false;
2710         }
2711         udelay(50);
2712 }
2713
2714 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2715 {
2716         const struct gfx_firmware_header_v1_0 *mec_hdr;
2717         const __le32 *fw_data;
2718         unsigned i;
2719         u32 tmp;
2720
2721         if (!adev->gfx.mec_fw)
2722                 return -EINVAL;
2723
2724         gfx_v9_0_cp_compute_enable(adev, false);
2725
2726         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2727         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2728
2729         fw_data = (const __le32 *)
2730                 (adev->gfx.mec_fw->data +
2731                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2732         tmp = 0;
2733         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2734         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2735         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2736
2737         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2738                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2739         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2740                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2741
2742         /* MEC1 */
2743         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2744                          mec_hdr->jt_offset);
2745         for (i = 0; i < mec_hdr->jt_size; i++)
2746                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2747                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2748
2749         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2750                         adev->gfx.mec_fw_version);
2751         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2752
2753         return 0;
2754 }
2755
2756 /* KIQ functions */
2757 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2758 {
2759         uint32_t tmp;
2760         struct amdgpu_device *adev = ring->adev;
2761
2762         /* tell RLC which is KIQ queue */
2763         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2764         tmp &= 0xffffff00;
2765         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2766         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2767         tmp |= 0x80;
2768         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2769 }
2770
2771 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2772 {
2773         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2774         uint64_t queue_mask = 0;
2775         int r, i;
2776
2777         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2778                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2779                         continue;
2780
2781                 /* This situation may be hit in the future if a new HW
2782                  * generation exposes more than 64 queues. If so, the
2783                  * definition of queue_mask needs updating */
2784                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2785                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2786                         break;
2787                 }
2788
2789                 queue_mask |= (1ull << i);
2790         }
2791
2792         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2793         if (r) {
2794                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2795                 return r;
2796         }
2797
2798         /* set resources */
2799         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2800         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2801                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2802         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2803         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2804         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2805         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2806         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2807         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2808         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2809                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2810                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2811                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2812
2813                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2814                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2815                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2816                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2817                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2818                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2819                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2820                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2821                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2822                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2823                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2824                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2825                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2826                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2827                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2828                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2829                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2830         }
2831
2832         r = amdgpu_ring_test_helper(kiq_ring);
2833         if (r)
2834                 DRM_ERROR("KCQ enable failed\n");
2835
2836         return r;
2837 }
2838
2839 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2840 {
2841         struct amdgpu_device *adev = ring->adev;
2842         struct v9_mqd *mqd = ring->mqd_ptr;
2843         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2844         uint32_t tmp;
2845
2846         mqd->header = 0xC0310800;
2847         mqd->compute_pipelinestat_enable = 0x00000001;
2848         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2849         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2850         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2851         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2852         mqd->compute_misc_reserved = 0x00000003;
2853
2854         mqd->dynamic_cu_mask_addr_lo =
2855                 lower_32_bits(ring->mqd_gpu_addr
2856                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2857         mqd->dynamic_cu_mask_addr_hi =
2858                 upper_32_bits(ring->mqd_gpu_addr
2859                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2860
2861         eop_base_addr = ring->eop_gpu_addr >> 8;
2862         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2863         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2864
2865         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2866         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2867         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2868                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2869
2870         mqd->cp_hqd_eop_control = tmp;
2871
2872         /* enable doorbell? */
2873         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2874
2875         if (ring->use_doorbell) {
2876                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2877                                     DOORBELL_OFFSET, ring->doorbell_index);
2878                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2879                                     DOORBELL_EN, 1);
2880                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2881                                     DOORBELL_SOURCE, 0);
2882                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883                                     DOORBELL_HIT, 0);
2884         } else {
2885                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2886                                          DOORBELL_EN, 0);
2887         }
2888
2889         mqd->cp_hqd_pq_doorbell_control = tmp;
2890
2891         /* disable the queue if it's active */
2892         ring->wptr = 0;
2893         mqd->cp_hqd_dequeue_request = 0;
2894         mqd->cp_hqd_pq_rptr = 0;
2895         mqd->cp_hqd_pq_wptr_lo = 0;
2896         mqd->cp_hqd_pq_wptr_hi = 0;
2897
2898         /* set the pointer to the MQD */
2899         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2900         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2901
2902         /* set MQD vmid to 0 */
2903         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2904         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2905         mqd->cp_mqd_control = tmp;
2906
2907         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2908         hqd_gpu_addr = ring->gpu_addr >> 8;
2909         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2910         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2911
2912         /* set up the HQD, this is similar to CP_RB0_CNTL */
2913         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2914         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2915                             (order_base_2(ring->ring_size / 4) - 1));
2916         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2917                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2918 #ifdef __BIG_ENDIAN
2919         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2920 #endif
2921         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2922         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2923         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2924         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2925         mqd->cp_hqd_pq_control = tmp;
2926
2927         /* set the wb address whether it's enabled or not */
2928         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2929         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2930         mqd->cp_hqd_pq_rptr_report_addr_hi =
2931                 upper_32_bits(wb_gpu_addr) & 0xffff;
2932
2933         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2934         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2935         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2936         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2937
2938         tmp = 0;
2939         /* enable the doorbell if requested */
2940         if (ring->use_doorbell) {
2941                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2942                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2943                                 DOORBELL_OFFSET, ring->doorbell_index);
2944
2945                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2946                                          DOORBELL_EN, 1);
2947                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2948                                          DOORBELL_SOURCE, 0);
2949                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2950                                          DOORBELL_HIT, 0);
2951         }
2952
2953         mqd->cp_hqd_pq_doorbell_control = tmp;
2954
2955         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2956         ring->wptr = 0;
2957         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2958
2959         /* set the vmid for the queue */
2960         mqd->cp_hqd_vmid = 0;
2961
2962         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2963         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2964         mqd->cp_hqd_persistent_state = tmp;
2965
2966         /* set MIN_IB_AVAIL_SIZE */
2967         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2968         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2969         mqd->cp_hqd_ib_control = tmp;
2970
2971         /* activate the queue */
2972         mqd->cp_hqd_active = 1;
2973
2974         return 0;
2975 }
2976
2977 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2978 {
2979         struct amdgpu_device *adev = ring->adev;
2980         struct v9_mqd *mqd = ring->mqd_ptr;
2981         int j;
2982
2983         /* disable wptr polling */
2984         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2985
2986         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2987                mqd->cp_hqd_eop_base_addr_lo);
2988         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2989                mqd->cp_hqd_eop_base_addr_hi);
2990
2991         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2992         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2993                mqd->cp_hqd_eop_control);
2994
2995         /* enable doorbell? */
2996         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2997                mqd->cp_hqd_pq_doorbell_control);
2998
2999         /* disable the queue if it's active */
3000         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3001                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3002                 for (j = 0; j < adev->usec_timeout; j++) {
3003                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3004                                 break;
3005                         udelay(1);
3006                 }
3007                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3008                        mqd->cp_hqd_dequeue_request);
3009                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3010                        mqd->cp_hqd_pq_rptr);
3011                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3012                        mqd->cp_hqd_pq_wptr_lo);
3013                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3014                        mqd->cp_hqd_pq_wptr_hi);
3015         }
3016
3017         /* set the pointer to the MQD */
3018         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3019                mqd->cp_mqd_base_addr_lo);
3020         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3021                mqd->cp_mqd_base_addr_hi);
3022
3023         /* set MQD vmid to 0 */
3024         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3025                mqd->cp_mqd_control);
3026
3027         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3028         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3029                mqd->cp_hqd_pq_base_lo);
3030         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3031                mqd->cp_hqd_pq_base_hi);
3032
3033         /* set up the HQD, this is similar to CP_RB0_CNTL */
3034         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3035                mqd->cp_hqd_pq_control);
3036
3037         /* set the wb address whether it's enabled or not */
3038         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3039                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3040         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3041                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3042
3043         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3044         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3045                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3046         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3047                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3048
3049         /* enable the doorbell if requested */
3050         if (ring->use_doorbell) {
3051                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3052                                         (adev->doorbell_index.kiq * 2) << 2);
3053                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3054                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3055         }
3056
3057         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3058                mqd->cp_hqd_pq_doorbell_control);
3059
3060         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3061         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3062                mqd->cp_hqd_pq_wptr_lo);
3063         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3064                mqd->cp_hqd_pq_wptr_hi);
3065
3066         /* set the vmid for the queue */
3067         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3068
3069         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3070                mqd->cp_hqd_persistent_state);
3071
3072         /* activate the queue */
3073         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3074                mqd->cp_hqd_active);
3075
3076         if (ring->use_doorbell)
3077                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3078
3079         return 0;
3080 }
3081
3082 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3083 {
3084         struct amdgpu_device *adev = ring->adev;
3085         int j;
3086
3087         /* disable the queue if it's active */
3088         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3089
3090                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3091
3092                 for (j = 0; j < adev->usec_timeout; j++) {
3093                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3094                                 break;
3095                         udelay(1);
3096                 }
3097
3098                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3099                         DRM_DEBUG("KIQ dequeue request failed.\n");
3100
3101                         /* Manual disable if dequeue request times out */
3102                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3103                 }
3104
3105                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3106                       0);
3107         }
3108
3109         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3110         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3111         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3112         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3113         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3114         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3115         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3116         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3117
3118         return 0;
3119 }
3120
3121 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3122 {
3123         struct amdgpu_device *adev = ring->adev;
3124         struct v9_mqd *mqd = ring->mqd_ptr;
3125         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3126
3127         gfx_v9_0_kiq_setting(ring);
3128
3129         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3130                 /* reset MQD to a clean status */
3131                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3132                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3133
3134                 /* reset ring buffer */
3135                 ring->wptr = 0;
3136                 amdgpu_ring_clear_ring(ring);
3137
3138                 mutex_lock(&adev->srbm_mutex);
3139                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3140                 gfx_v9_0_kiq_init_register(ring);
3141                 soc15_grbm_select(adev, 0, 0, 0, 0);
3142                 mutex_unlock(&adev->srbm_mutex);
3143         } else {
3144                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3145                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3146                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3147                 mutex_lock(&adev->srbm_mutex);
3148                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3149                 gfx_v9_0_mqd_init(ring);
3150                 gfx_v9_0_kiq_init_register(ring);
3151                 soc15_grbm_select(adev, 0, 0, 0, 0);
3152                 mutex_unlock(&adev->srbm_mutex);
3153
3154                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3155                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3156         }
3157
3158         return 0;
3159 }
3160
3161 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3162 {
3163         struct amdgpu_device *adev = ring->adev;
3164         struct v9_mqd *mqd = ring->mqd_ptr;
3165         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3166
3167         if (!adev->in_gpu_reset && !adev->in_suspend) {
3168                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3169                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3170                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3171                 mutex_lock(&adev->srbm_mutex);
3172                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3173                 gfx_v9_0_mqd_init(ring);
3174                 soc15_grbm_select(adev, 0, 0, 0, 0);
3175                 mutex_unlock(&adev->srbm_mutex);
3176
3177                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3178                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3179         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3180                 /* reset MQD to a clean status */
3181                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3182                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3183
3184                 /* reset ring buffer */
3185                 ring->wptr = 0;
3186                 amdgpu_ring_clear_ring(ring);
3187         } else {
3188                 amdgpu_ring_clear_ring(ring);
3189         }
3190
3191         return 0;
3192 }
3193
3194 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3195 {
3196         struct amdgpu_ring *ring;
3197         int r;
3198
3199         ring = &adev->gfx.kiq.ring;
3200
3201         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3202         if (unlikely(r != 0))
3203                 return r;
3204
3205         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3206         if (unlikely(r != 0))
3207                 return r;
3208
3209         gfx_v9_0_kiq_init_queue(ring);
3210         amdgpu_bo_kunmap(ring->mqd_obj);
3211         ring->mqd_ptr = NULL;
3212         amdgpu_bo_unreserve(ring->mqd_obj);
3213         ring->sched.ready = true;
3214         return 0;
3215 }
3216
3217 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3218 {
3219         struct amdgpu_ring *ring = NULL;
3220         int r = 0, i;
3221
3222         gfx_v9_0_cp_compute_enable(adev, true);
3223
3224         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3225                 ring = &adev->gfx.compute_ring[i];
3226
3227                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3228                 if (unlikely(r != 0))
3229                         goto done;
3230                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3231                 if (!r) {
3232                         r = gfx_v9_0_kcq_init_queue(ring);
3233                         amdgpu_bo_kunmap(ring->mqd_obj);
3234                         ring->mqd_ptr = NULL;
3235                 }
3236                 amdgpu_bo_unreserve(ring->mqd_obj);
3237                 if (r)
3238                         goto done;
3239         }
3240
3241         r = gfx_v9_0_kiq_kcq_enable(adev);
3242 done:
3243         return r;
3244 }
3245
3246 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3247 {
3248         int r, i;
3249         struct amdgpu_ring *ring;
3250
3251         if (!(adev->flags & AMD_IS_APU))
3252                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3253
3254         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3255                 /* legacy firmware loading */
3256                 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3257                 if (r)
3258                         return r;
3259
3260                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3261                 if (r)
3262                         return r;
3263         }
3264
3265         r = gfx_v9_0_kiq_resume(adev);
3266         if (r)
3267                 return r;
3268
3269         r = gfx_v9_0_cp_gfx_resume(adev);
3270         if (r)
3271                 return r;
3272
3273         r = gfx_v9_0_kcq_resume(adev);
3274         if (r)
3275                 return r;
3276
3277         ring = &adev->gfx.gfx_ring[0];
3278         r = amdgpu_ring_test_helper(ring);
3279         if (r)
3280                 return r;
3281
3282         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3283                 ring = &adev->gfx.compute_ring[i];
3284                 amdgpu_ring_test_helper(ring);
3285         }
3286
3287         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3288
3289         return 0;
3290 }
3291
3292 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3293 {
3294         gfx_v9_0_cp_gfx_enable(adev, enable);
3295         gfx_v9_0_cp_compute_enable(adev, enable);
3296 }
3297
3298 static int gfx_v9_0_hw_init(void *handle)
3299 {
3300         int r;
3301         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3302
3303         gfx_v9_0_init_golden_registers(adev);
3304
3305         gfx_v9_0_constants_init(adev);
3306
3307         r = gfx_v9_0_csb_vram_pin(adev);
3308         if (r)
3309                 return r;
3310
3311         r = adev->gfx.rlc.funcs->resume(adev);
3312         if (r)
3313                 return r;
3314
3315         r = gfx_v9_0_cp_resume(adev);
3316         if (r)
3317                 return r;
3318
3319         r = gfx_v9_0_ngg_en(adev);
3320         if (r)
3321                 return r;
3322
3323         return r;
3324 }
3325
3326 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3327 {
3328         int r, i;
3329         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3330
3331         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3332         if (r)
3333                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3334
3335         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3336                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3337
3338                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3339                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3340                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3341                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3342                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3343                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3344                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3345                 amdgpu_ring_write(kiq_ring, 0);
3346                 amdgpu_ring_write(kiq_ring, 0);
3347                 amdgpu_ring_write(kiq_ring, 0);
3348         }
3349         r = amdgpu_ring_test_helper(kiq_ring);
3350         if (r)
3351                 DRM_ERROR("KCQ disable failed\n");
3352
3353         return r;
3354 }
3355
3356 static int gfx_v9_0_hw_fini(void *handle)
3357 {
3358         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3359
3360         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3361         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3362         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3363
3364         /* disable KCQ to avoid CPC touch memory not valid anymore */
3365         gfx_v9_0_kcq_disable(adev);
3366
3367         if (amdgpu_sriov_vf(adev)) {
3368                 gfx_v9_0_cp_gfx_enable(adev, false);
3369                 /* must disable polling for SRIOV when hw finished, otherwise
3370                  * CPC engine may still keep fetching WB address which is already
3371                  * invalid after sw finished and trigger DMAR reading error in
3372                  * hypervisor side.
3373                  */
3374                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3375                 return 0;
3376         }
3377
3378         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3379          * otherwise KIQ is hanging when binding back
3380          */
3381         if (!adev->in_gpu_reset && !adev->in_suspend) {
3382                 mutex_lock(&adev->srbm_mutex);
3383                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3384                                 adev->gfx.kiq.ring.pipe,
3385                                 adev->gfx.kiq.ring.queue, 0);
3386                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3387                 soc15_grbm_select(adev, 0, 0, 0, 0);
3388                 mutex_unlock(&adev->srbm_mutex);
3389         }
3390
3391         gfx_v9_0_cp_enable(adev, false);
3392         adev->gfx.rlc.funcs->stop(adev);
3393
3394         gfx_v9_0_csb_vram_unpin(adev);
3395
3396         return 0;
3397 }
3398
3399 static int gfx_v9_0_suspend(void *handle)
3400 {
3401         return gfx_v9_0_hw_fini(handle);
3402 }
3403
3404 static int gfx_v9_0_resume(void *handle)
3405 {
3406         return gfx_v9_0_hw_init(handle);
3407 }
3408
3409 static bool gfx_v9_0_is_idle(void *handle)
3410 {
3411         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3412
3413         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3414                                 GRBM_STATUS, GUI_ACTIVE))
3415                 return false;
3416         else
3417                 return true;
3418 }
3419
3420 static int gfx_v9_0_wait_for_idle(void *handle)
3421 {
3422         unsigned i;
3423         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3424
3425         for (i = 0; i < adev->usec_timeout; i++) {
3426                 if (gfx_v9_0_is_idle(handle))
3427                         return 0;
3428                 udelay(1);
3429         }
3430         return -ETIMEDOUT;
3431 }
3432
3433 static int gfx_v9_0_soft_reset(void *handle)
3434 {
3435         u32 grbm_soft_reset = 0;
3436         u32 tmp;
3437         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3438
3439         /* GRBM_STATUS */
3440         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3441         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3442                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3443                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3444                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3445                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3446                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3447                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3448                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3449                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3450                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3451         }
3452
3453         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3454                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3455                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3456         }
3457
3458         /* GRBM_STATUS2 */
3459         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3460         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3461                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3462                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3463
3464
3465         if (grbm_soft_reset) {
3466                 /* stop the rlc */
3467                 adev->gfx.rlc.funcs->stop(adev);
3468
3469                 /* Disable GFX parsing/prefetching */
3470                 gfx_v9_0_cp_gfx_enable(adev, false);
3471
3472                 /* Disable MEC parsing/prefetching */
3473                 gfx_v9_0_cp_compute_enable(adev, false);
3474
3475                 if (grbm_soft_reset) {
3476                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3477                         tmp |= grbm_soft_reset;
3478                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3479                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3480                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3481
3482                         udelay(50);
3483
3484                         tmp &= ~grbm_soft_reset;
3485                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3486                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3487                 }
3488
3489                 /* Wait a little for things to settle down */
3490                 udelay(50);
3491         }
3492         return 0;
3493 }
3494
3495 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3496 {
3497         uint64_t clock;
3498
3499         mutex_lock(&adev->gfx.gpu_clock_mutex);
3500         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3501         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3502                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3503         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3504         return clock;
3505 }
3506
3507 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3508                                           uint32_t vmid,
3509                                           uint32_t gds_base, uint32_t gds_size,
3510                                           uint32_t gws_base, uint32_t gws_size,
3511                                           uint32_t oa_base, uint32_t oa_size)
3512 {
3513         struct amdgpu_device *adev = ring->adev;
3514
3515         /* GDS Base */
3516         gfx_v9_0_write_data_to_reg(ring, 0, false,
3517                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3518                                    gds_base);
3519
3520         /* GDS Size */
3521         gfx_v9_0_write_data_to_reg(ring, 0, false,
3522                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3523                                    gds_size);
3524
3525         /* GWS */
3526         gfx_v9_0_write_data_to_reg(ring, 0, false,
3527                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3528                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3529
3530         /* OA */
3531         gfx_v9_0_write_data_to_reg(ring, 0, false,
3532                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3533                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3534 }
3535
3536 static const u32 vgpr_init_compute_shader[] =
3537 {
3538         0xb07c0000, 0xbe8000ff,
3539         0x000000f8, 0xbf110800,
3540         0x7e000280, 0x7e020280,
3541         0x7e040280, 0x7e060280,
3542         0x7e080280, 0x7e0a0280,
3543         0x7e0c0280, 0x7e0e0280,
3544         0x80808800, 0xbe803200,
3545         0xbf84fff5, 0xbf9c0000,
3546         0xd28c0001, 0x0001007f,
3547         0xd28d0001, 0x0002027e,
3548         0x10020288, 0xb8810904,
3549         0xb7814000, 0xd1196a01,
3550         0x00000301, 0xbe800087,
3551         0xbefc00c1, 0xd89c4000,
3552         0x00020201, 0xd89cc080,
3553         0x00040401, 0x320202ff,
3554         0x00000800, 0x80808100,
3555         0xbf84fff8, 0x7e020280,
3556         0xbf810000, 0x00000000,
3557 };
3558
3559 static const u32 sgpr_init_compute_shader[] =
3560 {
3561         0xb07c0000, 0xbe8000ff,
3562         0x0000005f, 0xbee50080,
3563         0xbe812c65, 0xbe822c65,
3564         0xbe832c65, 0xbe842c65,
3565         0xbe852c65, 0xb77c0005,
3566         0x80808500, 0xbf84fff8,
3567         0xbe800080, 0xbf810000,
3568 };
3569
3570 static const struct soc15_reg_entry vgpr_init_regs[] = {
3571    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3572    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3573    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3574    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3581 };
3582
3583 static const struct soc15_reg_entry sgpr_init_regs[] = {
3584    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3585    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3586    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3587    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3588    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3589    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3590    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3591    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3592    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3593    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3594 };
3595
3596 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3597    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3598    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3599    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3600    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3601    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3602    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3603    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3604    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3605    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3606    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3607    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3608    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3609    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3610    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3611    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3612    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3613    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3614    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3615    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3616    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3617    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3618    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3619    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3620    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3621    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3622    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3623    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3624    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3625    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3626    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3627    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3628    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3629 };
3630
3631 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3632 {
3633         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3634         int i, r;
3635
3636         r = amdgpu_ring_alloc(ring, 7);
3637         if (r) {
3638                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3639                         ring->name, r);
3640                 return r;
3641         }
3642
3643         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3644         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3645
3646         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3647         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3648                                 PACKET3_DMA_DATA_DST_SEL(1) |
3649                                 PACKET3_DMA_DATA_SRC_SEL(2) |
3650                                 PACKET3_DMA_DATA_ENGINE(0)));
3651         amdgpu_ring_write(ring, 0);
3652         amdgpu_ring_write(ring, 0);
3653         amdgpu_ring_write(ring, 0);
3654         amdgpu_ring_write(ring, 0);
3655         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3656                                 adev->gds.gds_size);
3657
3658         amdgpu_ring_commit(ring);
3659
3660         for (i = 0; i < adev->usec_timeout; i++) {
3661                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3662                         break;
3663                 udelay(1);
3664         }
3665
3666         if (i >= adev->usec_timeout)
3667                 r = -ETIMEDOUT;
3668
3669         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3670
3671         return r;
3672 }
3673
3674 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3675 {
3676         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3677         struct amdgpu_ib ib;
3678         struct dma_fence *f = NULL;
3679         int r, i, j, k;
3680         unsigned total_size, vgpr_offset, sgpr_offset;
3681         u64 gpu_addr;
3682
3683         /* only support when RAS is enabled */
3684         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3685                 return 0;
3686
3687         /* bail if the compute ring is not ready */
3688         if (!ring->sched.ready)
3689                 return 0;
3690
3691         total_size =
3692                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3693         total_size +=
3694                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3695         total_size = ALIGN(total_size, 256);
3696         vgpr_offset = total_size;
3697         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3698         sgpr_offset = total_size;
3699         total_size += sizeof(sgpr_init_compute_shader);
3700
3701         /* allocate an indirect buffer to put the commands in */
3702         memset(&ib, 0, sizeof(ib));
3703         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3704         if (r) {
3705                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3706                 return r;
3707         }
3708
3709         /* load the compute shaders */
3710         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3711                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3712
3713         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3714                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3715
3716         /* init the ib length to 0 */
3717         ib.length_dw = 0;
3718
3719         /* VGPR */
3720         /* write the register state for the compute dispatch */
3721         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3722                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3723                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3724                                                                 - PACKET3_SET_SH_REG_START;
3725                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3726         }
3727         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3728         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3729         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3730         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3731                                                         - PACKET3_SET_SH_REG_START;
3732         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3733         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3734
3735         /* write dispatch packet */
3736         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3737         ib.ptr[ib.length_dw++] = 128; /* x */
3738         ib.ptr[ib.length_dw++] = 1; /* y */
3739         ib.ptr[ib.length_dw++] = 1; /* z */
3740         ib.ptr[ib.length_dw++] =
3741                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3742
3743         /* write CS partial flush packet */
3744         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3745         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3746
3747         /* SGPR */
3748         /* write the register state for the compute dispatch */
3749         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3750                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3751                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3752                                                                 - PACKET3_SET_SH_REG_START;
3753                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3754         }
3755         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3756         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3757         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3758         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3759                                                         - PACKET3_SET_SH_REG_START;
3760         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3761         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3762
3763         /* write dispatch packet */
3764         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3765         ib.ptr[ib.length_dw++] = 128; /* x */
3766         ib.ptr[ib.length_dw++] = 1; /* y */
3767         ib.ptr[ib.length_dw++] = 1; /* z */
3768         ib.ptr[ib.length_dw++] =
3769                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3770
3771         /* write CS partial flush packet */
3772         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3773         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3774
3775         /* shedule the ib on the ring */
3776         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3777         if (r) {
3778                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3779                 goto fail;
3780         }
3781
3782         /* wait for the GPU to finish processing the IB */
3783         r = dma_fence_wait(f, false);
3784         if (r) {
3785                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3786                 goto fail;
3787         }
3788
3789         /* read back registers to clear the counters */
3790         mutex_lock(&adev->grbm_idx_mutex);
3791         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3792                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3793                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3794                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3795                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3796                         }
3797                 }
3798         }
3799         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3800         mutex_unlock(&adev->grbm_idx_mutex);
3801
3802 fail:
3803         amdgpu_ib_free(adev, &ib, NULL);
3804         dma_fence_put(f);
3805
3806         return r;
3807 }
3808
3809 static int gfx_v9_0_early_init(void *handle)
3810 {
3811         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3812
3813         adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3814         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3815         gfx_v9_0_set_ring_funcs(adev);
3816         gfx_v9_0_set_irq_funcs(adev);
3817         gfx_v9_0_set_gds_init(adev);
3818         gfx_v9_0_set_rlc_funcs(adev);
3819
3820         return 0;
3821 }
3822
3823 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3824                 struct amdgpu_iv_entry *entry);
3825
3826 static int gfx_v9_0_ecc_late_init(void *handle)
3827 {
3828         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3829         struct ras_common_if **ras_if = &adev->gfx.ras_if;
3830         struct ras_ih_if ih_info = {
3831                 .cb = gfx_v9_0_process_ras_data_cb,
3832         };
3833         struct ras_fs_if fs_info = {
3834                 .sysfs_name = "gfx_err_count",
3835                 .debugfs_name = "gfx_err_inject",
3836         };
3837         struct ras_common_if ras_block = {
3838                 .block = AMDGPU_RAS_BLOCK__GFX,
3839                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3840                 .sub_block_index = 0,
3841                 .name = "gfx",
3842         };
3843         int r;
3844
3845         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3846                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3847                 return 0;
3848         }
3849
3850         r = gfx_v9_0_do_edc_gds_workarounds(adev);
3851         if (r)
3852                 return r;
3853
3854         /* requires IBs so do in late init after IB pool is initialized */
3855         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3856         if (r)
3857                 return r;
3858
3859         /* handle resume path. */
3860         if (*ras_if) {
3861                 /* resend ras TA enable cmd during resume.
3862                  * prepare to handle failure.
3863                  */
3864                 ih_info.head = **ras_if;
3865                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3866                 if (r) {
3867                         if (r == -EAGAIN) {
3868                                 /* request a gpu reset. will run again. */
3869                                 amdgpu_ras_request_reset_on_boot(adev,
3870                                                 AMDGPU_RAS_BLOCK__GFX);
3871                                 return 0;
3872                         }
3873                         /* fail to enable ras, cleanup all. */
3874                         goto irq;
3875                 }
3876                 /* enable successfully. continue. */
3877                 goto resume;
3878         }
3879
3880         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3881         if (!*ras_if)
3882                 return -ENOMEM;
3883
3884         **ras_if = ras_block;
3885
3886         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3887         if (r) {
3888                 if (r == -EAGAIN) {
3889                         amdgpu_ras_request_reset_on_boot(adev,
3890                                         AMDGPU_RAS_BLOCK__GFX);
3891                         r = 0;
3892                 }
3893                 goto feature;
3894         }
3895
3896         ih_info.head = **ras_if;
3897         fs_info.head = **ras_if;
3898
3899         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3900         if (r)
3901                 goto interrupt;
3902
3903         amdgpu_ras_debugfs_create(adev, &fs_info);
3904
3905         r = amdgpu_ras_sysfs_create(adev, &fs_info);
3906         if (r)
3907                 goto sysfs;
3908 resume:
3909         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3910         if (r)
3911                 goto irq;
3912
3913         return 0;
3914 irq:
3915         amdgpu_ras_sysfs_remove(adev, *ras_if);
3916 sysfs:
3917         amdgpu_ras_debugfs_remove(adev, *ras_if);
3918         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3919 interrupt:
3920         amdgpu_ras_feature_enable(adev, *ras_if, 0);
3921 feature:
3922         kfree(*ras_if);
3923         *ras_if = NULL;
3924         return r;
3925 }
3926
3927 static int gfx_v9_0_late_init(void *handle)
3928 {
3929         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3930         int r;
3931
3932         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3933         if (r)
3934                 return r;
3935
3936         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3937         if (r)
3938                 return r;
3939
3940         r = gfx_v9_0_ecc_late_init(handle);
3941         if (r)
3942                 return r;
3943
3944         return 0;
3945 }
3946
3947 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3948 {
3949         uint32_t rlc_setting;
3950
3951         /* if RLC is not enabled, do nothing */
3952         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3953         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3954                 return false;
3955
3956         return true;
3957 }
3958
3959 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3960 {
3961         uint32_t data;
3962         unsigned i;
3963
3964         data = RLC_SAFE_MODE__CMD_MASK;
3965         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3966         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3967
3968         /* wait for RLC_SAFE_MODE */
3969         for (i = 0; i < adev->usec_timeout; i++) {
3970                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3971                         break;
3972                 udelay(1);
3973         }
3974 }
3975
3976 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3977 {
3978         uint32_t data;
3979
3980         data = RLC_SAFE_MODE__CMD_MASK;
3981         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3982 }
3983
3984 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3985                                                 bool enable)
3986 {
3987         amdgpu_gfx_rlc_enter_safe_mode(adev);
3988
3989         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3990                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3991                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3992                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3993         } else {
3994                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3995                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3996         }
3997
3998         amdgpu_gfx_rlc_exit_safe_mode(adev);
3999 }
4000
4001 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4002                                                 bool enable)
4003 {
4004         /* TODO: double check if we need to perform under safe mode */
4005         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4006
4007         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4008                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4009         else
4010                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4011
4012         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4013                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4014         else
4015                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4016
4017         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4018 }
4019
4020 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4021                                                       bool enable)
4022 {
4023         uint32_t data, def;
4024
4025         amdgpu_gfx_rlc_enter_safe_mode(adev);
4026
4027         /* It is disabled by HW by default */
4028         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4029                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4030                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4031
4032                 if (adev->asic_type != CHIP_VEGA12)
4033                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4034
4035                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4036                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4037                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4038
4039                 /* only for Vega10 & Raven1 */
4040                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4041
4042                 if (def != data)
4043                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4044
4045                 /* MGLS is a global flag to control all MGLS in GFX */
4046                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4047                         /* 2 - RLC memory Light sleep */
4048                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4049                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4050                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4051                                 if (def != data)
4052                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4053                         }
4054                         /* 3 - CP memory Light sleep */
4055                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4056                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4057                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4058                                 if (def != data)
4059                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4060                         }
4061                 }
4062         } else {
4063                 /* 1 - MGCG_OVERRIDE */
4064                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4065
4066                 if (adev->asic_type != CHIP_VEGA12)
4067                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4068
4069                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4070                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4071                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4072                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4073
4074                 if (def != data)
4075                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4076
4077                 /* 2 - disable MGLS in RLC */
4078                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4079                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4080                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4081                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4082                 }
4083
4084                 /* 3 - disable MGLS in CP */
4085                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4086                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4087                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4088                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4089                 }
4090         }
4091
4092         amdgpu_gfx_rlc_exit_safe_mode(adev);
4093 }
4094
4095 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4096                                            bool enable)
4097 {
4098         uint32_t data, def;
4099
4100         amdgpu_gfx_rlc_enter_safe_mode(adev);
4101
4102         /* Enable 3D CGCG/CGLS */
4103         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4104                 /* write cmd to clear cgcg/cgls ov */
4105                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4106                 /* unset CGCG override */
4107                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4108                 /* update CGCG and CGLS override bits */
4109                 if (def != data)
4110                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4111
4112                 /* enable 3Dcgcg FSM(0x0000363f) */
4113                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4114
4115                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4116                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4117                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4118                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4119                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4120                 if (def != data)
4121                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4122
4123                 /* set IDLE_POLL_COUNT(0x00900100) */
4124                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4125                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4126                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4127                 if (def != data)
4128                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4129         } else {
4130                 /* Disable CGCG/CGLS */
4131                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4132                 /* disable cgcg, cgls should be disabled */
4133                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4134                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4135                 /* disable cgcg and cgls in FSM */
4136                 if (def != data)
4137                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4138         }
4139
4140         amdgpu_gfx_rlc_exit_safe_mode(adev);
4141 }
4142
4143 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4144                                                       bool enable)
4145 {
4146         uint32_t def, data;
4147
4148         amdgpu_gfx_rlc_enter_safe_mode(adev);
4149
4150         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4151                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4152                 /* unset CGCG override */
4153                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4154                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4155                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4156                 else
4157                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4158                 /* update CGCG and CGLS override bits */
4159                 if (def != data)
4160                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4161
4162                 /* enable cgcg FSM(0x0000363F) */
4163                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4164
4165                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4166                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4167                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4168                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4169                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4170                 if (def != data)
4171                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4172
4173                 /* set IDLE_POLL_COUNT(0x00900100) */
4174                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4175                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4176                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4177                 if (def != data)
4178                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4179         } else {
4180                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4181                 /* reset CGCG/CGLS bits */
4182                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4183                 /* disable cgcg and cgls in FSM */
4184                 if (def != data)
4185                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4186         }
4187
4188         amdgpu_gfx_rlc_exit_safe_mode(adev);
4189 }
4190
4191 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4192                                             bool enable)
4193 {
4194         if (enable) {
4195                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4196                  * ===  MGCG + MGLS ===
4197                  */
4198                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4199                 /* ===  CGCG /CGLS for GFX 3D Only === */
4200                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4201                 /* ===  CGCG + CGLS === */
4202                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4203         } else {
4204                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4205                  * ===  CGCG + CGLS ===
4206                  */
4207                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4208                 /* ===  CGCG /CGLS for GFX 3D Only === */
4209                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4210                 /* ===  MGCG + MGLS === */
4211                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4212         }
4213         return 0;
4214 }
4215
4216 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4217         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4218         .set_safe_mode = gfx_v9_0_set_safe_mode,
4219         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4220         .init = gfx_v9_0_rlc_init,
4221         .get_csb_size = gfx_v9_0_get_csb_size,
4222         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4223         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4224         .resume = gfx_v9_0_rlc_resume,
4225         .stop = gfx_v9_0_rlc_stop,
4226         .reset = gfx_v9_0_rlc_reset,
4227         .start = gfx_v9_0_rlc_start
4228 };
4229
4230 static int gfx_v9_0_set_powergating_state(void *handle,
4231                                           enum amd_powergating_state state)
4232 {
4233         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4234         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4235
4236         switch (adev->asic_type) {
4237         case CHIP_RAVEN:
4238                 if (!enable) {
4239                         amdgpu_gfx_off_ctrl(adev, false);
4240                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4241                 }
4242                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4243                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4244                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4245                 } else {
4246                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4247                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4248                 }
4249
4250                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4251                         gfx_v9_0_enable_cp_power_gating(adev, true);
4252                 else
4253                         gfx_v9_0_enable_cp_power_gating(adev, false);
4254
4255                 /* update gfx cgpg state */
4256                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4257
4258                 /* update mgcg state */
4259                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4260
4261                 if (enable)
4262                         amdgpu_gfx_off_ctrl(adev, true);
4263                 break;
4264         case CHIP_VEGA12:
4265                 if (!enable) {
4266                         amdgpu_gfx_off_ctrl(adev, false);
4267                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4268                 } else {
4269                         amdgpu_gfx_off_ctrl(adev, true);
4270                 }
4271                 break;
4272         default:
4273                 break;
4274         }
4275
4276         return 0;
4277 }
4278
4279 static int gfx_v9_0_set_clockgating_state(void *handle,
4280                                           enum amd_clockgating_state state)
4281 {
4282         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4283
4284         if (amdgpu_sriov_vf(adev))
4285                 return 0;
4286
4287         switch (adev->asic_type) {
4288         case CHIP_VEGA10:
4289         case CHIP_VEGA12:
4290         case CHIP_VEGA20:
4291         case CHIP_RAVEN:
4292                 gfx_v9_0_update_gfx_clock_gating(adev,
4293                                                  state == AMD_CG_STATE_GATE ? true : false);
4294                 break;
4295         default:
4296                 break;
4297         }
4298         return 0;
4299 }
4300
4301 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4302 {
4303         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4304         int data;
4305
4306         if (amdgpu_sriov_vf(adev))
4307                 *flags = 0;
4308
4309         /* AMD_CG_SUPPORT_GFX_MGCG */
4310         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4311         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4312                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4313
4314         /* AMD_CG_SUPPORT_GFX_CGCG */
4315         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4316         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4317                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4318
4319         /* AMD_CG_SUPPORT_GFX_CGLS */
4320         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4321                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4322
4323         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4324         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4325         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4326                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4327
4328         /* AMD_CG_SUPPORT_GFX_CP_LS */
4329         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4330         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4331                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4332
4333         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4334         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4335         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4336                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4337
4338         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4339         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4340                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4341 }
4342
4343 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4344 {
4345         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4346 }
4347
4348 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4349 {
4350         struct amdgpu_device *adev = ring->adev;
4351         u64 wptr;
4352
4353         /* XXX check if swapping is necessary on BE */
4354         if (ring->use_doorbell) {
4355                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4356         } else {
4357                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4358                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4359         }
4360
4361         return wptr;
4362 }
4363
4364 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4365 {
4366         struct amdgpu_device *adev = ring->adev;
4367
4368         if (ring->use_doorbell) {
4369                 /* XXX check if swapping is necessary on BE */
4370                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4371                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4372         } else {
4373                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4374                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4375         }
4376 }
4377
4378 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4379 {
4380         struct amdgpu_device *adev = ring->adev;
4381         u32 ref_and_mask, reg_mem_engine;
4382         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4383
4384         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4385                 switch (ring->me) {
4386                 case 1:
4387                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4388                         break;
4389                 case 2:
4390                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4391                         break;
4392                 default:
4393                         return;
4394                 }
4395                 reg_mem_engine = 0;
4396         } else {
4397                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4398                 reg_mem_engine = 1; /* pfp */
4399         }
4400
4401         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4402                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4403                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4404                               ref_and_mask, ref_and_mask, 0x20);
4405 }
4406
4407 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4408                                         struct amdgpu_job *job,
4409                                         struct amdgpu_ib *ib,
4410                                         uint32_t flags)
4411 {
4412         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4413         u32 header, control = 0;
4414
4415         if (ib->flags & AMDGPU_IB_FLAG_CE)
4416                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4417         else
4418                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4419
4420         control |= ib->length_dw | (vmid << 24);
4421
4422         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4423                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4424
4425                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4426                         gfx_v9_0_ring_emit_de_meta(ring);
4427         }
4428
4429         amdgpu_ring_write(ring, header);
4430         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4431         amdgpu_ring_write(ring,
4432 #ifdef __BIG_ENDIAN
4433                 (2 << 0) |
4434 #endif
4435                 lower_32_bits(ib->gpu_addr));
4436         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4437         amdgpu_ring_write(ring, control);
4438 }
4439
4440 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4441                                           struct amdgpu_job *job,
4442                                           struct amdgpu_ib *ib,
4443                                           uint32_t flags)
4444 {
4445         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4446         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4447
4448         /* Currently, there is a high possibility to get wave ID mismatch
4449          * between ME and GDS, leading to a hw deadlock, because ME generates
4450          * different wave IDs than the GDS expects. This situation happens
4451          * randomly when at least 5 compute pipes use GDS ordered append.
4452          * The wave IDs generated by ME are also wrong after suspend/resume.
4453          * Those are probably bugs somewhere else in the kernel driver.
4454          *
4455          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4456          * GDS to 0 for this ring (me/pipe).
4457          */
4458         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4459                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4460                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4461                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4462         }
4463
4464         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4465         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4466         amdgpu_ring_write(ring,
4467 #ifdef __BIG_ENDIAN
4468                                 (2 << 0) |
4469 #endif
4470                                 lower_32_bits(ib->gpu_addr));
4471         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4472         amdgpu_ring_write(ring, control);
4473 }
4474
4475 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4476                                      u64 seq, unsigned flags)
4477 {
4478         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4479         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4480         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4481
4482         /* RELEASE_MEM - flush caches, send int */
4483         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4484         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4485                                                EOP_TC_NC_ACTION_EN) :
4486                                               (EOP_TCL1_ACTION_EN |
4487                                                EOP_TC_ACTION_EN |
4488                                                EOP_TC_WB_ACTION_EN |
4489                                                EOP_TC_MD_ACTION_EN)) |
4490                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4491                                  EVENT_INDEX(5)));
4492         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4493
4494         /*
4495          * the address should be Qword aligned if 64bit write, Dword
4496          * aligned if only send 32bit data low (discard data high)
4497          */
4498         if (write64bit)
4499                 BUG_ON(addr & 0x7);
4500         else
4501                 BUG_ON(addr & 0x3);
4502         amdgpu_ring_write(ring, lower_32_bits(addr));
4503         amdgpu_ring_write(ring, upper_32_bits(addr));
4504         amdgpu_ring_write(ring, lower_32_bits(seq));
4505         amdgpu_ring_write(ring, upper_32_bits(seq));
4506         amdgpu_ring_write(ring, 0);
4507 }
4508
4509 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4510 {
4511         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4512         uint32_t seq = ring->fence_drv.sync_seq;
4513         uint64_t addr = ring->fence_drv.gpu_addr;
4514
4515         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4516                               lower_32_bits(addr), upper_32_bits(addr),
4517                               seq, 0xffffffff, 4);
4518 }
4519
4520 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4521                                         unsigned vmid, uint64_t pd_addr)
4522 {
4523         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4524
4525         /* compute doesn't have PFP */
4526         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4527                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4528                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4529                 amdgpu_ring_write(ring, 0x0);
4530         }
4531 }
4532
4533 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4534 {
4535         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4536 }
4537
4538 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4539 {
4540         u64 wptr;
4541
4542         /* XXX check if swapping is necessary on BE */
4543         if (ring->use_doorbell)
4544                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4545         else
4546                 BUG();
4547         return wptr;
4548 }
4549
4550 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4551                                            bool acquire)
4552 {
4553         struct amdgpu_device *adev = ring->adev;
4554         int pipe_num, tmp, reg;
4555         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4556
4557         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4558
4559         /* first me only has 2 entries, GFX and HP3D */
4560         if (ring->me > 0)
4561                 pipe_num -= 2;
4562
4563         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4564         tmp = RREG32(reg);
4565         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4566         WREG32(reg, tmp);
4567 }
4568
4569 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4570                                             struct amdgpu_ring *ring,
4571                                             bool acquire)
4572 {
4573         int i, pipe;
4574         bool reserve;
4575         struct amdgpu_ring *iring;
4576
4577         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4578         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4579         if (acquire)
4580                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4581         else
4582                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4583
4584         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4585                 /* Clear all reservations - everyone reacquires all resources */
4586                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4587                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4588                                                        true);
4589
4590                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4591                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4592                                                        true);
4593         } else {
4594                 /* Lower all pipes without a current reservation */
4595                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4596                         iring = &adev->gfx.gfx_ring[i];
4597                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4598                                                            iring->me,
4599                                                            iring->pipe,
4600                                                            0);
4601                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4602                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4603                 }
4604
4605                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4606                         iring = &adev->gfx.compute_ring[i];
4607                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4608                                                            iring->me,
4609                                                            iring->pipe,
4610                                                            0);
4611                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4612                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4613                 }
4614         }
4615
4616         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4617 }
4618
4619 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4620                                       struct amdgpu_ring *ring,
4621                                       bool acquire)
4622 {
4623         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4624         uint32_t queue_priority = acquire ? 0xf : 0x0;
4625
4626         mutex_lock(&adev->srbm_mutex);
4627         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4628
4629         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4630         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4631
4632         soc15_grbm_select(adev, 0, 0, 0, 0);
4633         mutex_unlock(&adev->srbm_mutex);
4634 }
4635
4636 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4637                                                enum drm_sched_priority priority)
4638 {
4639         struct amdgpu_device *adev = ring->adev;
4640         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4641
4642         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4643                 return;
4644
4645         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4646         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4647 }
4648
4649 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4650 {
4651         struct amdgpu_device *adev = ring->adev;
4652
4653         /* XXX check if swapping is necessary on BE */
4654         if (ring->use_doorbell) {
4655                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4656                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4657         } else{
4658                 BUG(); /* only DOORBELL method supported on gfx9 now */
4659         }
4660 }
4661
4662 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4663                                          u64 seq, unsigned int flags)
4664 {
4665         struct amdgpu_device *adev = ring->adev;
4666
4667         /* we only allocate 32bit for each seq wb address */
4668         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4669
4670         /* write fence seq to the "addr" */
4671         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4672         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4673                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4674         amdgpu_ring_write(ring, lower_32_bits(addr));
4675         amdgpu_ring_write(ring, upper_32_bits(addr));
4676         amdgpu_ring_write(ring, lower_32_bits(seq));
4677
4678         if (flags & AMDGPU_FENCE_FLAG_INT) {
4679                 /* set register to trigger INT */
4680                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4681                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4682                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4683                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4684                 amdgpu_ring_write(ring, 0);
4685                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4686         }
4687 }
4688
4689 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4690 {
4691         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4692         amdgpu_ring_write(ring, 0);
4693 }
4694
4695 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4696 {
4697         struct v9_ce_ib_state ce_payload = {0};
4698         uint64_t csa_addr;
4699         int cnt;
4700
4701         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4702         csa_addr = amdgpu_csa_vaddr(ring->adev);
4703
4704         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4705         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4706                                  WRITE_DATA_DST_SEL(8) |
4707                                  WR_CONFIRM) |
4708                                  WRITE_DATA_CACHE_POLICY(0));
4709         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4710         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4711         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4712 }
4713
4714 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4715 {
4716         struct v9_de_ib_state de_payload = {0};
4717         uint64_t csa_addr, gds_addr;
4718         int cnt;
4719
4720         csa_addr = amdgpu_csa_vaddr(ring->adev);
4721         gds_addr = csa_addr + 4096;
4722         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4723         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4724
4725         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4726         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4727         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4728                                  WRITE_DATA_DST_SEL(8) |
4729                                  WR_CONFIRM) |
4730                                  WRITE_DATA_CACHE_POLICY(0));
4731         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4732         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4733         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4734 }
4735
4736 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4737 {
4738         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4739         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4740 }
4741
4742 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4743 {
4744         uint32_t dw2 = 0;
4745
4746         if (amdgpu_sriov_vf(ring->adev))
4747                 gfx_v9_0_ring_emit_ce_meta(ring);
4748
4749         gfx_v9_0_ring_emit_tmz(ring, true);
4750
4751         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4752         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4753                 /* set load_global_config & load_global_uconfig */
4754                 dw2 |= 0x8001;
4755                 /* set load_cs_sh_regs */
4756                 dw2 |= 0x01000000;
4757                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4758                 dw2 |= 0x10002;
4759
4760                 /* set load_ce_ram if preamble presented */
4761                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4762                         dw2 |= 0x10000000;
4763         } else {
4764                 /* still load_ce_ram if this is the first time preamble presented
4765                  * although there is no context switch happens.
4766                  */
4767                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4768                         dw2 |= 0x10000000;
4769         }
4770
4771         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4772         amdgpu_ring_write(ring, dw2);
4773         amdgpu_ring_write(ring, 0);
4774 }
4775
4776 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4777 {
4778         unsigned ret;
4779         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4780         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4781         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4782         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4783         ret = ring->wptr & ring->buf_mask;
4784         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4785         return ret;
4786 }
4787
4788 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4789 {
4790         unsigned cur;
4791         BUG_ON(offset > ring->buf_mask);
4792         BUG_ON(ring->ring[offset] != 0x55aa55aa);
4793
4794         cur = (ring->wptr & ring->buf_mask) - 1;
4795         if (likely(cur > offset))
4796                 ring->ring[offset] = cur - offset;
4797         else
4798                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4799 }
4800
4801 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4802 {
4803         struct amdgpu_device *adev = ring->adev;
4804
4805         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4806         amdgpu_ring_write(ring, 0 |     /* src: register*/
4807                                 (5 << 8) |      /* dst: memory */
4808                                 (1 << 20));     /* write confirm */
4809         amdgpu_ring_write(ring, reg);
4810         amdgpu_ring_write(ring, 0);
4811         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4812                                 adev->virt.reg_val_offs * 4));
4813         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4814                                 adev->virt.reg_val_offs * 4));
4815 }
4816
4817 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4818                                     uint32_t val)
4819 {
4820         uint32_t cmd = 0;
4821
4822         switch (ring->funcs->type) {
4823         case AMDGPU_RING_TYPE_GFX:
4824                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4825                 break;
4826         case AMDGPU_RING_TYPE_KIQ:
4827                 cmd = (1 << 16); /* no inc addr */
4828                 break;
4829         default:
4830                 cmd = WR_CONFIRM;
4831                 break;
4832         }
4833         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4834         amdgpu_ring_write(ring, cmd);
4835         amdgpu_ring_write(ring, reg);
4836         amdgpu_ring_write(ring, 0);
4837         amdgpu_ring_write(ring, val);
4838 }
4839
4840 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4841                                         uint32_t val, uint32_t mask)
4842 {
4843         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4844 }
4845
4846 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4847                                                   uint32_t reg0, uint32_t reg1,
4848                                                   uint32_t ref, uint32_t mask)
4849 {
4850         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4851         struct amdgpu_device *adev = ring->adev;
4852         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4853                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4854
4855         if (fw_version_ok)
4856                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4857                                       ref, mask, 0x20);
4858         else
4859                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4860                                                            ref, mask);
4861 }
4862
4863 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4864 {
4865         struct amdgpu_device *adev = ring->adev;
4866         uint32_t value = 0;
4867
4868         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4869         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4870         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4871         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4872         WREG32(mmSQ_CMD, value);
4873 }
4874
4875 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4876                                                  enum amdgpu_interrupt_state state)
4877 {
4878         switch (state) {
4879         case AMDGPU_IRQ_STATE_DISABLE:
4880         case AMDGPU_IRQ_STATE_ENABLE:
4881                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4882                                TIME_STAMP_INT_ENABLE,
4883                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4884                 break;
4885         default:
4886                 break;
4887         }
4888 }
4889
4890 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4891                                                      int me, int pipe,
4892                                                      enum amdgpu_interrupt_state state)
4893 {
4894         u32 mec_int_cntl, mec_int_cntl_reg;
4895
4896         /*
4897          * amdgpu controls only the first MEC. That's why this function only
4898          * handles the setting of interrupts for this specific MEC. All other
4899          * pipes' interrupts are set by amdkfd.
4900          */
4901
4902         if (me == 1) {
4903                 switch (pipe) {
4904                 case 0:
4905                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4906                         break;
4907                 case 1:
4908                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4909                         break;
4910                 case 2:
4911                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4912                         break;
4913                 case 3:
4914                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4915                         break;
4916                 default:
4917                         DRM_DEBUG("invalid pipe %d\n", pipe);
4918                         return;
4919                 }
4920         } else {
4921                 DRM_DEBUG("invalid me %d\n", me);
4922                 return;
4923         }
4924
4925         switch (state) {
4926         case AMDGPU_IRQ_STATE_DISABLE:
4927                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4928                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4929                                              TIME_STAMP_INT_ENABLE, 0);
4930                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4931                 break;
4932         case AMDGPU_IRQ_STATE_ENABLE:
4933                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4934                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4935                                              TIME_STAMP_INT_ENABLE, 1);
4936                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4937                 break;
4938         default:
4939                 break;
4940         }
4941 }
4942
4943 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4944                                              struct amdgpu_irq_src *source,
4945                                              unsigned type,
4946                                              enum amdgpu_interrupt_state state)
4947 {
4948         switch (state) {
4949         case AMDGPU_IRQ_STATE_DISABLE:
4950         case AMDGPU_IRQ_STATE_ENABLE:
4951                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4952                                PRIV_REG_INT_ENABLE,
4953                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4954                 break;
4955         default:
4956                 break;
4957         }
4958
4959         return 0;
4960 }
4961
4962 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4963                                               struct amdgpu_irq_src *source,
4964                                               unsigned type,
4965                                               enum amdgpu_interrupt_state state)
4966 {
4967         switch (state) {
4968         case AMDGPU_IRQ_STATE_DISABLE:
4969         case AMDGPU_IRQ_STATE_ENABLE:
4970                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4971                                PRIV_INSTR_INT_ENABLE,
4972                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4973         default:
4974                 break;
4975         }
4976
4977         return 0;
4978 }
4979
4980 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
4981         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4982                         CP_ECC_ERROR_INT_ENABLE, 1)
4983
4984 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
4985         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4986                         CP_ECC_ERROR_INT_ENABLE, 0)
4987
4988 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4989                                               struct amdgpu_irq_src *source,
4990                                               unsigned type,
4991                                               enum amdgpu_interrupt_state state)
4992 {
4993         switch (state) {
4994         case AMDGPU_IRQ_STATE_DISABLE:
4995                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4996                                 CP_ECC_ERROR_INT_ENABLE, 0);
4997                 DISABLE_ECC_ON_ME_PIPE(1, 0);
4998                 DISABLE_ECC_ON_ME_PIPE(1, 1);
4999                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5000                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5001                 break;
5002
5003         case AMDGPU_IRQ_STATE_ENABLE:
5004                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5005                                 CP_ECC_ERROR_INT_ENABLE, 1);
5006                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5007                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5008                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5009                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5010                 break;
5011         default:
5012                 break;
5013         }
5014
5015         return 0;
5016 }
5017
5018
5019 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5020                                             struct amdgpu_irq_src *src,
5021                                             unsigned type,
5022                                             enum amdgpu_interrupt_state state)
5023 {
5024         switch (type) {
5025         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5026                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5027                 break;
5028         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5029                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5030                 break;
5031         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5032                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5033                 break;
5034         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5035                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5036                 break;
5037         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5038                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5039                 break;
5040         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5041                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5042                 break;
5043         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5044                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5045                 break;
5046         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5047                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5048                 break;
5049         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5050                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5051                 break;
5052         default:
5053                 break;
5054         }
5055         return 0;
5056 }
5057
5058 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5059                             struct amdgpu_irq_src *source,
5060                             struct amdgpu_iv_entry *entry)
5061 {
5062         int i;
5063         u8 me_id, pipe_id, queue_id;
5064         struct amdgpu_ring *ring;
5065
5066         DRM_DEBUG("IH: CP EOP\n");
5067         me_id = (entry->ring_id & 0x0c) >> 2;
5068         pipe_id = (entry->ring_id & 0x03) >> 0;
5069         queue_id = (entry->ring_id & 0x70) >> 4;
5070
5071         switch (me_id) {
5072         case 0:
5073                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5074                 break;
5075         case 1:
5076         case 2:
5077                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5078                         ring = &adev->gfx.compute_ring[i];
5079                         /* Per-queue interrupt is supported for MEC starting from VI.
5080                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5081                           */
5082                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5083                                 amdgpu_fence_process(ring);
5084                 }
5085                 break;
5086         }
5087         return 0;
5088 }
5089
5090 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5091                            struct amdgpu_iv_entry *entry)
5092 {
5093         u8 me_id, pipe_id, queue_id;
5094         struct amdgpu_ring *ring;
5095         int i;
5096
5097         me_id = (entry->ring_id & 0x0c) >> 2;
5098         pipe_id = (entry->ring_id & 0x03) >> 0;
5099         queue_id = (entry->ring_id & 0x70) >> 4;
5100
5101         switch (me_id) {
5102         case 0:
5103                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5104                 break;
5105         case 1:
5106         case 2:
5107                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5108                         ring = &adev->gfx.compute_ring[i];
5109                         if (ring->me == me_id && ring->pipe == pipe_id &&
5110                             ring->queue == queue_id)
5111                                 drm_sched_fault(&ring->sched);
5112                 }
5113                 break;
5114         }
5115 }
5116
5117 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5118                                  struct amdgpu_irq_src *source,
5119                                  struct amdgpu_iv_entry *entry)
5120 {
5121         DRM_ERROR("Illegal register access in command stream\n");
5122         gfx_v9_0_fault(adev, entry);
5123         return 0;
5124 }
5125
5126 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5127                                   struct amdgpu_irq_src *source,
5128                                   struct amdgpu_iv_entry *entry)
5129 {
5130         DRM_ERROR("Illegal instruction in command stream\n");
5131         gfx_v9_0_fault(adev, entry);
5132         return 0;
5133 }
5134
5135 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5136                 struct amdgpu_iv_entry *entry)
5137 {
5138         /* TODO ue will trigger an interrupt. */
5139         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5140         amdgpu_ras_reset_gpu(adev, 0);
5141         return AMDGPU_RAS_UE;
5142 }
5143
5144 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5145                                   struct amdgpu_irq_src *source,
5146                                   struct amdgpu_iv_entry *entry)
5147 {
5148         struct ras_common_if *ras_if = adev->gfx.ras_if;
5149         struct ras_dispatch_if ih_data = {
5150                 .entry = entry,
5151         };
5152
5153         if (!ras_if)
5154                 return 0;
5155
5156         ih_data.head = *ras_if;
5157
5158         DRM_ERROR("CP ECC ERROR IRQ\n");
5159         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5160         return 0;
5161 }
5162
5163 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5164         .name = "gfx_v9_0",
5165         .early_init = gfx_v9_0_early_init,
5166         .late_init = gfx_v9_0_late_init,
5167         .sw_init = gfx_v9_0_sw_init,
5168         .sw_fini = gfx_v9_0_sw_fini,
5169         .hw_init = gfx_v9_0_hw_init,
5170         .hw_fini = gfx_v9_0_hw_fini,
5171         .suspend = gfx_v9_0_suspend,
5172         .resume = gfx_v9_0_resume,
5173         .is_idle = gfx_v9_0_is_idle,
5174         .wait_for_idle = gfx_v9_0_wait_for_idle,
5175         .soft_reset = gfx_v9_0_soft_reset,
5176         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5177         .set_powergating_state = gfx_v9_0_set_powergating_state,
5178         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5179 };
5180
5181 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5182         .type = AMDGPU_RING_TYPE_GFX,
5183         .align_mask = 0xff,
5184         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5185         .support_64bit_ptrs = true,
5186         .vmhub = AMDGPU_GFXHUB,
5187         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5188         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5189         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5190         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5191                 5 +  /* COND_EXEC */
5192                 7 +  /* PIPELINE_SYNC */
5193                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5194                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5195                 2 + /* VM_FLUSH */
5196                 8 +  /* FENCE for VM_FLUSH */
5197                 20 + /* GDS switch */
5198                 4 + /* double SWITCH_BUFFER,
5199                        the first COND_EXEC jump to the place just
5200                            prior to this double SWITCH_BUFFER  */
5201                 5 + /* COND_EXEC */
5202                 7 +      /*     HDP_flush */
5203                 4 +      /*     VGT_flush */
5204                 14 + /* CE_META */
5205                 31 + /* DE_META */
5206                 3 + /* CNTX_CTRL */
5207                 5 + /* HDP_INVL */
5208                 8 + 8 + /* FENCE x2 */
5209                 2, /* SWITCH_BUFFER */
5210         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5211         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5212         .emit_fence = gfx_v9_0_ring_emit_fence,
5213         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5214         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5215         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5216         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5217         .test_ring = gfx_v9_0_ring_test_ring,
5218         .test_ib = gfx_v9_0_ring_test_ib,
5219         .insert_nop = amdgpu_ring_insert_nop,
5220         .pad_ib = amdgpu_ring_generic_pad_ib,
5221         .emit_switch_buffer = gfx_v9_ring_emit_sb,
5222         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5223         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5224         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5225         .emit_tmz = gfx_v9_0_ring_emit_tmz,
5226         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5227         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5228         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5229         .soft_recovery = gfx_v9_0_ring_soft_recovery,
5230 };
5231
5232 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5233         .type = AMDGPU_RING_TYPE_COMPUTE,
5234         .align_mask = 0xff,
5235         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5236         .support_64bit_ptrs = true,
5237         .vmhub = AMDGPU_GFXHUB,
5238         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5239         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5240         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5241         .emit_frame_size =
5242                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5243                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5244                 5 + /* hdp invalidate */
5245                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5246                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5247                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5248                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5249                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5250         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5251         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5252         .emit_fence = gfx_v9_0_ring_emit_fence,
5253         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5254         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5255         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5256         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5257         .test_ring = gfx_v9_0_ring_test_ring,
5258         .test_ib = gfx_v9_0_ring_test_ib,
5259         .insert_nop = amdgpu_ring_insert_nop,
5260         .pad_ib = amdgpu_ring_generic_pad_ib,
5261         .set_priority = gfx_v9_0_ring_set_priority_compute,
5262         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5263         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5264         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5265 };
5266
5267 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5268         .type = AMDGPU_RING_TYPE_KIQ,
5269         .align_mask = 0xff,
5270         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5271         .support_64bit_ptrs = true,
5272         .vmhub = AMDGPU_GFXHUB,
5273         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5274         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5275         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5276         .emit_frame_size =
5277                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5278                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5279                 5 + /* hdp invalidate */
5280                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5281                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5282                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5283                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5284                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5285         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5286         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5287         .test_ring = gfx_v9_0_ring_test_ring,
5288         .insert_nop = amdgpu_ring_insert_nop,
5289         .pad_ib = amdgpu_ring_generic_pad_ib,
5290         .emit_rreg = gfx_v9_0_ring_emit_rreg,
5291         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5292         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5293         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5294 };
5295
5296 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5297 {
5298         int i;
5299
5300         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5301
5302         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5303                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5304
5305         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5306                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5307 }
5308
5309 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5310         .set = gfx_v9_0_set_eop_interrupt_state,
5311         .process = gfx_v9_0_eop_irq,
5312 };
5313
5314 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5315         .set = gfx_v9_0_set_priv_reg_fault_state,
5316         .process = gfx_v9_0_priv_reg_irq,
5317 };
5318
5319 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5320         .set = gfx_v9_0_set_priv_inst_fault_state,
5321         .process = gfx_v9_0_priv_inst_irq,
5322 };
5323
5324 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5325         .set = gfx_v9_0_set_cp_ecc_error_state,
5326         .process = gfx_v9_0_cp_ecc_error_irq,
5327 };
5328
5329
5330 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5331 {
5332         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5333         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5334
5335         adev->gfx.priv_reg_irq.num_types = 1;
5336         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5337
5338         adev->gfx.priv_inst_irq.num_types = 1;
5339         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5340
5341         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5342         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5343 }
5344
5345 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5346 {
5347         switch (adev->asic_type) {
5348         case CHIP_VEGA10:
5349         case CHIP_VEGA12:
5350         case CHIP_VEGA20:
5351         case CHIP_RAVEN:
5352                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5353                 break;
5354         default:
5355                 break;
5356         }
5357 }
5358
5359 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5360 {
5361         /* init asci gds info */
5362         switch (adev->asic_type) {
5363         case CHIP_VEGA10:
5364         case CHIP_VEGA12:
5365         case CHIP_VEGA20:
5366                 adev->gds.gds_size = 0x10000;
5367                 break;
5368         case CHIP_RAVEN:
5369                 adev->gds.gds_size = 0x1000;
5370                 break;
5371         default:
5372                 adev->gds.gds_size = 0x10000;
5373                 break;
5374         }
5375
5376         switch (adev->asic_type) {
5377         case CHIP_VEGA10:
5378         case CHIP_VEGA20:
5379                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5380                 break;
5381         case CHIP_VEGA12:
5382                 adev->gds.gds_compute_max_wave_id = 0x27f;
5383                 break;
5384         case CHIP_RAVEN:
5385                 if (adev->rev_id >= 0x8)
5386                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5387                 else
5388                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5389                 break;
5390         default:
5391                 /* this really depends on the chip */
5392                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5393                 break;
5394         }
5395
5396         adev->gds.gws_size = 64;
5397         adev->gds.oa_size = 16;
5398 }
5399
5400 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5401                                                  u32 bitmap)
5402 {
5403         u32 data;
5404
5405         if (!bitmap)
5406                 return;
5407
5408         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5409         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5410
5411         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5412 }
5413
5414 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5415 {
5416         u32 data, mask;
5417
5418         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5419         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5420
5421         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5422         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5423
5424         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5425
5426         return (~data) & mask;
5427 }
5428
5429 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5430                                  struct amdgpu_cu_info *cu_info)
5431 {
5432         int i, j, k, counter, active_cu_number = 0;
5433         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5434         unsigned disable_masks[4 * 2];
5435
5436         if (!adev || !cu_info)
5437                 return -EINVAL;
5438
5439         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5440
5441         mutex_lock(&adev->grbm_idx_mutex);
5442         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5443                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5444                         mask = 1;
5445                         ao_bitmap = 0;
5446                         counter = 0;
5447                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5448                         if (i < 4 && j < 2)
5449                                 gfx_v9_0_set_user_cu_inactive_bitmap(
5450                                         adev, disable_masks[i * 2 + j]);
5451                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5452                         cu_info->bitmap[i][j] = bitmap;
5453
5454                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5455                                 if (bitmap & mask) {
5456                                         if (counter < adev->gfx.config.max_cu_per_sh)
5457                                                 ao_bitmap |= mask;
5458                                         counter ++;
5459                                 }
5460                                 mask <<= 1;
5461                         }
5462                         active_cu_number += counter;
5463                         if (i < 2 && j < 2)
5464                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5465                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5466                 }
5467         }
5468         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5469         mutex_unlock(&adev->grbm_idx_mutex);
5470
5471         cu_info->number = active_cu_number;
5472         cu_info->ao_cu_mask = ao_cu_mask;
5473         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5474
5475         return 0;
5476 }
5477
5478 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5479 {
5480         .type = AMD_IP_BLOCK_TYPE_GFX,
5481         .major = 9,
5482         .minor = 0,
5483         .rev = 0,
5484         .funcs = &gfx_v9_0_ip_funcs,
5485 };