1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
23 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
28 spin_lock_irqsave(&ring->lock, flags);
30 /* Copy the shadow to the actual register */
31 ring->cur = ring->next;
33 /* Make sure to wrap wptr if we need to */
34 wptr = get_wptr(ring);
36 spin_unlock_irqrestore(&ring->lock, flags);
38 /* Make sure everything is posted before making a decision */
41 /* Update HW if this is the current ring and we are not in preempt */
42 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 struct msm_file_private *ctx)
49 struct msm_drm_private *priv = gpu->dev->dev_private;
50 struct msm_ringbuffer *ring = submit->ring;
51 struct msm_gem_object *obj;
52 uint32_t *ptr, dwords;
55 for (i = 0; i < submit->nr_cmds; i++) {
56 switch (submit->cmd[i].type) {
57 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
59 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 if (priv->lastctx == ctx)
63 case MSM_SUBMIT_CMD_BUF:
64 /* copy commands into RB: */
65 obj = submit->bos[submit->cmd[i].idx].obj;
66 dwords = submit->cmd[i].size;
68 ptr = msm_gem_get_vaddr(&obj->base);
70 /* _get_vaddr() shouldn't fail at this point,
71 * since we've already mapped it once in
77 for (i = 0; i < dwords; i++) {
78 /* normally the OUT_PKTn() would wait
79 * for space for the packet. But since
80 * we just OUT_RING() the whole thing,
81 * need to call adreno_wait_ring()
84 adreno_wait_ring(ring, 1);
85 OUT_RING(ring, ptr[i]);
88 msm_gem_put_vaddr(&obj->base);
94 a5xx_flush(gpu, ring);
95 a5xx_preempt_trigger(gpu);
97 /* we might not necessarily have a cmd from userspace to
98 * trigger an event to know that submit has completed, so
101 a5xx_idle(gpu, ring);
102 ring->memptrs->fence = submit->seqno;
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107 struct msm_file_private *ctx)
109 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111 struct msm_drm_private *priv = gpu->dev->dev_private;
112 struct msm_ringbuffer *ring = submit->ring;
113 unsigned int i, ibs = 0;
115 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116 priv->lastctx = NULL;
117 a5xx_submit_in_rb(gpu, submit, ctx);
121 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122 OUT_RING(ring, 0x02);
124 /* Turn off protected mode to write to special registers */
125 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
128 /* Set the save preemption record for the ring/command */
129 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
133 /* Turn back on protected mode */
134 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
137 /* Enable local preemption for finegrain preemption */
138 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139 OUT_RING(ring, 0x02);
141 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143 OUT_RING(ring, 0x02);
145 /* Submit the commands */
146 for (i = 0; i < submit->nr_cmds; i++) {
147 switch (submit->cmd[i].type) {
148 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
150 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151 if (priv->lastctx == ctx)
154 case MSM_SUBMIT_CMD_BUF:
155 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158 OUT_RING(ring, submit->cmd[i].size);
165 * Write the render mode to NULL (0) to indicate to the CP that the IBs
166 * are done rendering - otherwise a lucky preemption would start
167 * replaying from the last checkpoint
169 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
176 /* Turn off IB level preemptions */
177 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178 OUT_RING(ring, 0x01);
180 /* Write the fence to the scratch register */
181 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182 OUT_RING(ring, submit->seqno);
185 * Execute a CACHE_FLUSH_TS event. This will ensure that the
186 * timestamp is written to the memory and then triggers the interrupt
188 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
190 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
191 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
192 OUT_RING(ring, submit->seqno);
194 /* Yield the floor on command completion */
195 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
197 * If dword[2:1] are non zero, they specify an address for the CP to
198 * write the value of dword[3] to on preemption complete. Write 0 to
201 OUT_RING(ring, 0x00);
202 OUT_RING(ring, 0x00);
203 /* Data value - not used if the address above is 0 */
204 OUT_RING(ring, 0x01);
205 /* Set bit 0 to trigger an interrupt on preempt complete */
206 OUT_RING(ring, 0x01);
208 a5xx_flush(gpu, ring);
210 /* Check to see if we need to start preemption */
211 a5xx_preempt_trigger(gpu);
214 static const struct {
218 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
219 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
220 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
221 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
222 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
223 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
224 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
225 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
226 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
227 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
228 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
229 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
230 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
231 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
232 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
233 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
234 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
235 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
236 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
237 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
238 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
239 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
240 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
241 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
242 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
243 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
244 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
245 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
246 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
247 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
248 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
249 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
250 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
251 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
252 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
253 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
254 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
255 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
256 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
257 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
258 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
259 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
260 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
261 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
262 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
263 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
264 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
265 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
266 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
267 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
268 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
269 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
270 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
271 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
272 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
273 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
274 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
275 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
276 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
277 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
278 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
279 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
280 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
281 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
282 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
283 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
284 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
285 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
286 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
287 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
288 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
289 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
290 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
291 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
292 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
293 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
294 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
295 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
296 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
297 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
298 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
299 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
300 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
301 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
302 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
303 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
304 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
305 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
306 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
307 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
308 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
309 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
312 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
314 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
317 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
318 gpu_write(gpu, a5xx_hwcg[i].offset,
319 state ? a5xx_hwcg[i].value : 0);
321 if (adreno_is_a540(adreno_gpu)) {
322 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
323 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
326 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
327 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
330 static int a5xx_me_init(struct msm_gpu *gpu)
332 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
333 struct msm_ringbuffer *ring = gpu->rb[0];
335 OUT_PKT7(ring, CP_ME_INIT, 8);
337 OUT_RING(ring, 0x0000002F);
339 /* Enable multiple hardware contexts */
340 OUT_RING(ring, 0x00000003);
342 /* Enable error detection */
343 OUT_RING(ring, 0x20000000);
345 /* Don't enable header dump */
346 OUT_RING(ring, 0x00000000);
347 OUT_RING(ring, 0x00000000);
349 /* Specify workarounds for various microcode issues */
350 if (adreno_is_a530(adreno_gpu)) {
351 /* Workaround for token end syncs
352 * Force a WFI after every direct-render 3D mode draw and every
355 OUT_RING(ring, 0x0000000B);
357 /* No workarounds enabled */
358 OUT_RING(ring, 0x00000000);
361 OUT_RING(ring, 0x00000000);
362 OUT_RING(ring, 0x00000000);
364 gpu->funcs->flush(gpu, ring);
365 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
368 static int a5xx_preempt_start(struct msm_gpu *gpu)
370 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
371 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
372 struct msm_ringbuffer *ring = gpu->rb[0];
374 if (gpu->nr_rings == 1)
377 /* Turn off protected mode to write to special registers */
378 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
381 /* Set the save preemption record for the ring/command */
382 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
383 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
384 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
386 /* Turn back on protected mode */
387 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
390 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
391 OUT_RING(ring, 0x00);
393 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
394 OUT_RING(ring, 0x01);
396 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
397 OUT_RING(ring, 0x01);
399 /* Yield the floor on command completion */
400 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
401 OUT_RING(ring, 0x00);
402 OUT_RING(ring, 0x00);
403 OUT_RING(ring, 0x01);
404 OUT_RING(ring, 0x01);
406 gpu->funcs->flush(gpu, ring);
408 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
411 static int a5xx_ucode_init(struct msm_gpu *gpu)
413 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
414 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
417 if (!a5xx_gpu->pm4_bo) {
418 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
419 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
422 if (IS_ERR(a5xx_gpu->pm4_bo)) {
423 ret = PTR_ERR(a5xx_gpu->pm4_bo);
424 a5xx_gpu->pm4_bo = NULL;
425 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
430 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
433 if (!a5xx_gpu->pfp_bo) {
434 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
435 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
437 if (IS_ERR(a5xx_gpu->pfp_bo)) {
438 ret = PTR_ERR(a5xx_gpu->pfp_bo);
439 a5xx_gpu->pfp_bo = NULL;
440 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
445 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
448 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
449 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
451 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
452 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
457 #define SCM_GPU_ZAP_SHADER_RESUME 0
459 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
463 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
465 DRM_ERROR("%s: zap-shader resume failed: %d\n",
471 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
477 * If the zap shader is already loaded into memory we just need to kick
478 * the remote processor to reinitialize it
481 return a5xx_zap_shader_resume(gpu);
483 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
489 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
490 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
491 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
492 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
493 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
494 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
495 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
496 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
497 A5XX_RBBM_INT_0_MASK_CP_SW | \
498 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
499 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
500 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
502 static int a5xx_hw_init(struct msm_gpu *gpu)
504 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
507 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
509 if (adreno_is_a540(adreno_gpu))
510 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
512 /* Make all blocks contribute to the GPU BUSY perf counter */
513 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
515 /* Enable RBBM error reporting bits */
516 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
518 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
520 * Mask out the activity signals from RB1-3 to avoid false
524 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
526 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
528 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
530 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
532 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
534 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
536 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
538 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
542 /* Enable fault detection */
543 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
546 /* Turn on performance counters */
547 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
549 /* Select CP0 to always count cycles */
550 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
552 /* Select RBBM0 to countable 6 to get the busy status for devfreq */
553 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
555 /* Increase VFD cache access so LRZ and other data gets evicted less */
556 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
558 /* Disable L2 bypass in the UCHE */
559 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
560 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
561 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
562 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
564 /* Set the GMEM VA range (0 to gpu->gmem) */
565 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
566 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
567 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
568 0x00100000 + adreno_gpu->gmem - 1);
569 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
571 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
572 if (adreno_is_a530(adreno_gpu))
573 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
574 if (adreno_is_a540(adreno_gpu))
575 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
576 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
577 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
579 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
581 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
582 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
584 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
586 /* Enable USE_RETENTION_FLOPS */
587 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
589 /* Enable ME/PFP split notification */
590 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
593 a5xx_set_hwcg(gpu, true);
595 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
597 /* Set the highest bank bit */
598 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
599 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
600 if (adreno_is_a540(adreno_gpu))
601 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
603 /* Protect registers from the CP */
604 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
607 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
608 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
609 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
610 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
611 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
612 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
614 /* Content protect */
615 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
616 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
618 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
619 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
622 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
623 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
624 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
625 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
628 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
629 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
632 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
633 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
636 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
638 if (adreno_is_a530(adreno_gpu))
639 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
640 ADRENO_PROTECT_RW(0x10000, 0x8000));
642 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
644 * Disable the trusted memory range - we don't actually supported secure
645 * memory rendering at this point in time and we don't want to block off
646 * part of the virtual memory space.
648 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
649 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
650 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
652 /* Put the GPU into 64 bit by default */
653 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
654 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
655 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
656 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
657 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
658 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
659 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
660 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
661 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
662 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
663 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
664 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
667 * VPC corner case with local memory load kill leads to corrupt
668 * internal state. Normal Disable does not work for all a5x chips.
669 * So do the following setting to disable it.
671 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
672 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
673 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
676 ret = adreno_hw_init(gpu);
680 a5xx_preempt_hw_init(gpu);
682 a5xx_gpmu_ucode_init(gpu);
684 ret = a5xx_ucode_init(gpu);
688 /* Disable the interrupts through the initial bringup stage */
689 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
691 /* Clear ME_HALT to start the micro engine */
692 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
693 ret = a5xx_me_init(gpu);
697 ret = a5xx_power_init(gpu);
702 * Send a pipeline event stat to get misbehaving counters to start
705 if (adreno_is_a530(adreno_gpu)) {
706 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
707 OUT_RING(gpu->rb[0], 0x0F);
709 gpu->funcs->flush(gpu, gpu->rb[0]);
710 if (!a5xx_idle(gpu, gpu->rb[0]))
715 * Try to load a zap shader into the secure world. If successful
716 * we can use the CP to switch out of secure mode. If not then we
717 * have no resource but to try to switch ourselves out manually. If we
718 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
719 * be blocked and a permissions violation will soon follow.
721 ret = a5xx_zap_shader_init(gpu);
723 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
724 OUT_RING(gpu->rb[0], 0x00000000);
726 gpu->funcs->flush(gpu, gpu->rb[0]);
727 if (!a5xx_idle(gpu, gpu->rb[0]))
730 /* Print a warning so if we die, we know why */
731 dev_warn_once(gpu->dev->dev,
732 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
733 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
736 /* Last step - yield the ringbuffer */
737 a5xx_preempt_start(gpu);
742 static void a5xx_recover(struct msm_gpu *gpu)
746 adreno_dump_info(gpu);
748 for (i = 0; i < 8; i++) {
749 printk("CP_SCRATCH_REG%d: %u\n", i,
750 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
756 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
757 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
758 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
762 static void a5xx_destroy(struct msm_gpu *gpu)
764 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
765 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
767 DBG("%s", gpu->name);
769 a5xx_preempt_fini(gpu);
771 if (a5xx_gpu->pm4_bo) {
772 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
773 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
776 if (a5xx_gpu->pfp_bo) {
777 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
778 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
781 if (a5xx_gpu->gpmu_bo) {
782 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
783 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
786 adreno_gpu_cleanup(adreno_gpu);
790 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
792 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
796 * Nearly every abnormality ends up pausing the GPU and triggering a
797 * fault so we can safely just watch for this one interrupt to fire
799 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
800 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
803 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
805 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
806 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
808 if (ring != a5xx_gpu->cur_ring) {
809 WARN(1, "Tried to idle a non-current ringbuffer\n");
813 /* wait for CP to drain ringbuffer: */
814 if (!adreno_idle(gpu, ring))
817 if (spin_until(_a5xx_check_idle(gpu))) {
818 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
819 gpu->name, __builtin_return_address(0),
820 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
821 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
822 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
823 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
830 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
832 struct msm_gpu *gpu = arg;
833 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
835 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
836 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
837 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
838 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
843 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
845 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
847 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
850 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
853 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
857 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
858 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
860 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
864 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
865 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
866 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
868 if (status & A5XX_CP_INT_CP_DMA_ERROR)
869 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
871 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
872 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
874 dev_err_ratelimited(gpu->dev->dev,
875 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
876 val & (1 << 24) ? "WRITE" : "READ",
877 (val & 0xFFFFF) >> 2, val);
880 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
881 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
882 const char *access[16] = { "reserved", "reserved",
883 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
884 "", "", "me read", "me write", "", "", "crashdump read",
887 dev_err_ratelimited(gpu->dev->dev,
888 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
889 status & 0xFFFFF, access[(status >> 24) & 0xF],
890 (status & (1 << 31)), status);
894 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
896 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
897 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
899 dev_err_ratelimited(gpu->dev->dev,
900 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
901 val & (1 << 28) ? "WRITE" : "READ",
902 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
905 /* Clear the error */
906 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
908 /* Clear the interrupt */
909 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
910 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
913 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
914 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
916 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
917 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
918 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
920 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
921 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
922 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
924 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
925 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
926 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
928 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
929 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
931 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
932 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
935 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
937 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
939 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
941 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
945 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
947 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
950 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
952 struct drm_device *dev = gpu->dev;
953 struct msm_drm_private *priv = dev->dev_private;
954 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
956 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
957 ring ? ring->id : -1, ring ? ring->seqno : 0,
958 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
959 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
960 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
961 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
962 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
963 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
964 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
966 /* Turn off the hangcheck timer to keep it from bothering us */
967 del_timer(&gpu->hangcheck_timer);
969 queue_work(priv->wq, &gpu->recover_work);
972 #define RBBM_ERROR_MASK \
973 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
974 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
975 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
976 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
977 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
978 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
980 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
982 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
985 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
986 * before the source is cleared the interrupt will storm.
988 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
989 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
991 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
992 if (status & RBBM_ERROR_MASK)
993 a5xx_rbbm_err_irq(gpu, status);
995 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
996 a5xx_cp_err_irq(gpu);
998 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
999 a5xx_fault_detect_irq(gpu);
1001 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1002 a5xx_uche_err_irq(gpu);
1004 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1005 a5xx_gpmu_err_irq(gpu);
1007 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1008 a5xx_preempt_trigger(gpu);
1009 msm_gpu_retire(gpu);
1012 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1013 a5xx_preempt_irq(gpu);
1018 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1019 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1020 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1021 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1022 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1023 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1024 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1025 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1026 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1029 static const u32 a5xx_registers[] = {
1030 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1031 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1032 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1033 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1034 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1035 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1036 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1037 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1038 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1039 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1040 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1041 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1042 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1043 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1044 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1045 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1046 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1047 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1048 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1049 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1050 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1051 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1052 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1053 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1054 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1055 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1056 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1060 static void a5xx_dump(struct msm_gpu *gpu)
1062 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
1063 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1067 static int a5xx_pm_resume(struct msm_gpu *gpu)
1071 /* Turn on the core power */
1072 ret = msm_gpu_pm_resume(gpu);
1076 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1077 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1079 /* Wait 3 usecs before polling */
1082 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1083 (1 << 20), (1 << 20));
1085 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1087 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1091 /* Turn on the SP domain */
1092 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1093 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1094 (1 << 20), (1 << 20));
1096 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1102 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1104 /* Clear the VBIF pipe before shutting down */
1105 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1106 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1108 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1111 * Reset the VBIF before power collapse to avoid issue with FIFO
1114 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1115 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1117 return msm_gpu_pm_suspend(gpu);
1120 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1122 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1123 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1128 struct a5xx_crashdumper {
1130 struct drm_gem_object *bo;
1134 struct a5xx_gpu_state {
1135 struct msm_gpu_state base;
1139 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1140 struct a5xx_crashdumper *dumper)
1142 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1143 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1144 &dumper->bo, &dumper->iova);
1146 if (!IS_ERR(dumper->ptr))
1147 msm_gem_object_set_name(dumper->bo, "crashdump");
1149 return PTR_ERR_OR_ZERO(dumper->ptr);
1152 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1153 struct a5xx_crashdumper *dumper)
1157 if (IS_ERR_OR_NULL(dumper->ptr))
1160 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1161 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1163 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1165 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1166 val & 0x04, 100, 10000);
1170 * These are a list of the registers that need to be read through the HLSQ
1171 * aperture through the crashdumper. These are not nominally accessible from
1172 * the CPU on a secure platform.
1174 static const struct {
1178 } a5xx_hlsq_aperture_regs[] = {
1179 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
1180 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
1181 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
1182 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
1183 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
1184 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
1185 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
1186 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
1187 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1188 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1189 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
1190 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
1191 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
1192 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
1193 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
1196 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1197 struct a5xx_gpu_state *a5xx_state)
1199 struct a5xx_crashdumper dumper = { 0 };
1200 u32 offset, count = 0;
1204 if (a5xx_crashdumper_init(gpu, &dumper))
1207 /* The script will be written at offset 0 */
1210 /* Start writing the data at offset 256k */
1211 offset = dumper.iova + (256 * SZ_1K);
1213 /* Count how many additional registers to get from the HLSQ aperture */
1214 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1215 count += a5xx_hlsq_aperture_regs[i].count;
1217 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1218 if (!a5xx_state->hlsqregs)
1221 /* Build the crashdump script */
1222 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1223 u32 type = a5xx_hlsq_aperture_regs[i].type;
1224 u32 c = a5xx_hlsq_aperture_regs[i].count;
1226 /* Write the register to select the desired bank */
1227 *ptr++ = ((u64) type << 8);
1228 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1232 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1235 offset += c * sizeof(u32);
1238 /* Write two zeros to close off the script */
1242 if (a5xx_crashdumper_run(gpu, &dumper)) {
1243 kfree(a5xx_state->hlsqregs);
1244 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1248 /* Copy the data from the crashdumper to the state */
1249 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1250 count * sizeof(u32));
1252 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1255 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1257 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1261 return ERR_PTR(-ENOMEM);
1263 /* Temporarily disable hardware clock gating before reading the hw */
1264 a5xx_set_hwcg(gpu, false);
1266 /* First get the generic state from the adreno core */
1267 adreno_gpu_state_get(gpu, &(a5xx_state->base));
1269 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1271 /* Get the HLSQ regs with the help of the crashdumper */
1272 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1274 a5xx_set_hwcg(gpu, true);
1276 return &a5xx_state->base;
1279 static void a5xx_gpu_state_destroy(struct kref *kref)
1281 struct msm_gpu_state *state = container_of(kref,
1282 struct msm_gpu_state, ref);
1283 struct a5xx_gpu_state *a5xx_state = container_of(state,
1284 struct a5xx_gpu_state, base);
1286 kfree(a5xx_state->hlsqregs);
1288 adreno_gpu_state_destroy(state);
1292 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1294 if (IS_ERR_OR_NULL(state))
1297 return kref_put(&state->ref, a5xx_gpu_state_destroy);
1301 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1302 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1303 struct drm_printer *p)
1307 struct a5xx_gpu_state *a5xx_state = container_of(state,
1308 struct a5xx_gpu_state, base);
1310 if (IS_ERR_OR_NULL(state))
1313 adreno_show(gpu, state, p);
1315 /* Dump the additional a5xx HLSQ registers */
1316 if (!a5xx_state->hlsqregs)
1319 drm_printf(p, "registers-hlsq:\n");
1321 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1322 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1323 u32 c = a5xx_hlsq_aperture_regs[i].count;
1325 for (j = 0; j < c; j++, pos++, o++) {
1327 * To keep the crashdump simple we pull the entire range
1328 * for each register type but not all of the registers
1329 * in the range are valid. Fortunately invalid registers
1330 * stick out like a sore thumb with a value of
1333 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1336 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
1337 o << 2, a5xx_state->hlsqregs[pos]);
1343 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1345 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1346 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1348 return a5xx_gpu->cur_ring;
1351 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1353 u64 busy_cycles, busy_time;
1355 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1356 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1358 busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1359 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1361 gpu->devfreq.busy_cycles = busy_cycles;
1363 if (WARN_ON(busy_time > ~0LU))
1366 return (unsigned long)busy_time;
1369 static const struct adreno_gpu_funcs funcs = {
1371 .get_param = adreno_get_param,
1372 .hw_init = a5xx_hw_init,
1373 .pm_suspend = a5xx_pm_suspend,
1374 .pm_resume = a5xx_pm_resume,
1375 .recover = a5xx_recover,
1376 .submit = a5xx_submit,
1377 .flush = a5xx_flush,
1378 .active_ring = a5xx_active_ring,
1380 .destroy = a5xx_destroy,
1381 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1384 #if defined(CONFIG_DEBUG_FS)
1385 .debugfs_init = a5xx_debugfs_init,
1387 .gpu_busy = a5xx_gpu_busy,
1388 .gpu_state_get = a5xx_gpu_state_get,
1389 .gpu_state_put = a5xx_gpu_state_put,
1391 .get_timestamp = a5xx_get_timestamp,
1394 static void check_speed_bin(struct device *dev)
1396 struct nvmem_cell *cell;
1399 cell = nvmem_cell_get(dev, "speed_bin");
1401 /* If a nvmem cell isn't defined, nothing to do */
1405 bin = *((u32 *) nvmem_cell_read(cell, NULL));
1406 nvmem_cell_put(cell);
1410 dev_pm_opp_set_supported_hw(dev, &val, 1);
1413 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1415 struct msm_drm_private *priv = dev->dev_private;
1416 struct platform_device *pdev = priv->gpu_pdev;
1417 struct a5xx_gpu *a5xx_gpu = NULL;
1418 struct adreno_gpu *adreno_gpu;
1419 struct msm_gpu *gpu;
1423 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1424 return ERR_PTR(-ENXIO);
1427 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1429 return ERR_PTR(-ENOMEM);
1431 adreno_gpu = &a5xx_gpu->base;
1432 gpu = &adreno_gpu->base;
1434 adreno_gpu->registers = a5xx_registers;
1435 adreno_gpu->reg_offsets = a5xx_register_offsets;
1437 a5xx_gpu->lm_leakage = 0x4E001A;
1439 check_speed_bin(&pdev->dev);
1441 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1443 a5xx_destroy(&(a5xx_gpu->base.base));
1444 return ERR_PTR(ret);
1448 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1450 /* Set up the preemption specific bits and pieces for each ringbuffer */
1451 a5xx_preempt_init(gpu);