1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
23 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
28 spin_lock_irqsave(&ring->lock, flags);
30 /* Copy the shadow to the actual register */
31 ring->cur = ring->next;
33 /* Make sure to wrap wptr if we need to */
34 wptr = get_wptr(ring);
36 spin_unlock_irqrestore(&ring->lock, flags);
38 /* Make sure everything is posted before making a decision */
41 /* Update HW if this is the current ring and we are not in preempt */
42 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 struct msm_file_private *ctx)
49 struct msm_drm_private *priv = gpu->dev->dev_private;
50 struct msm_ringbuffer *ring = submit->ring;
51 struct msm_gem_object *obj;
52 uint32_t *ptr, dwords;
55 for (i = 0; i < submit->nr_cmds; i++) {
56 switch (submit->cmd[i].type) {
57 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
59 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 if (priv->lastctx == ctx)
62 case MSM_SUBMIT_CMD_BUF:
63 /* copy commands into RB: */
64 obj = submit->bos[submit->cmd[i].idx].obj;
65 dwords = submit->cmd[i].size;
67 ptr = msm_gem_get_vaddr(&obj->base);
69 /* _get_vaddr() shouldn't fail at this point,
70 * since we've already mapped it once in
76 for (i = 0; i < dwords; i++) {
77 /* normally the OUT_PKTn() would wait
78 * for space for the packet. But since
79 * we just OUT_RING() the whole thing,
80 * need to call adreno_wait_ring()
83 adreno_wait_ring(ring, 1);
84 OUT_RING(ring, ptr[i]);
87 msm_gem_put_vaddr(&obj->base);
93 a5xx_flush(gpu, ring);
94 a5xx_preempt_trigger(gpu);
96 /* we might not necessarily have a cmd from userspace to
97 * trigger an event to know that submit has completed, so
100 a5xx_idle(gpu, ring);
101 ring->memptrs->fence = submit->seqno;
105 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
106 struct msm_file_private *ctx)
108 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
109 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
110 struct msm_drm_private *priv = gpu->dev->dev_private;
111 struct msm_ringbuffer *ring = submit->ring;
112 unsigned int i, ibs = 0;
114 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
115 priv->lastctx = NULL;
116 a5xx_submit_in_rb(gpu, submit, ctx);
120 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
121 OUT_RING(ring, 0x02);
123 /* Turn off protected mode to write to special registers */
124 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
127 /* Set the save preemption record for the ring/command */
128 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
129 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
130 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132 /* Turn back on protected mode */
133 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
136 /* Enable local preemption for finegrain preemption */
137 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
138 OUT_RING(ring, 0x02);
140 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
141 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
142 OUT_RING(ring, 0x02);
144 /* Submit the commands */
145 for (i = 0; i < submit->nr_cmds; i++) {
146 switch (submit->cmd[i].type) {
147 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
150 if (priv->lastctx == ctx)
152 case MSM_SUBMIT_CMD_BUF:
153 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
154 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
155 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
156 OUT_RING(ring, submit->cmd[i].size);
163 * Write the render mode to NULL (0) to indicate to the CP that the IBs
164 * are done rendering - otherwise a lucky preemption would start
165 * replaying from the last checkpoint
167 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
174 /* Turn off IB level preemptions */
175 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
176 OUT_RING(ring, 0x01);
178 /* Write the fence to the scratch register */
179 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
180 OUT_RING(ring, submit->seqno);
183 * Execute a CACHE_FLUSH_TS event. This will ensure that the
184 * timestamp is written to the memory and then triggers the interrupt
186 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
187 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
188 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
189 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
190 OUT_RING(ring, submit->seqno);
192 /* Yield the floor on command completion */
193 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
195 * If dword[2:1] are non zero, they specify an address for the CP to
196 * write the value of dword[3] to on preemption complete. Write 0 to
199 OUT_RING(ring, 0x00);
200 OUT_RING(ring, 0x00);
201 /* Data value - not used if the address above is 0 */
202 OUT_RING(ring, 0x01);
203 /* Set bit 0 to trigger an interrupt on preempt complete */
204 OUT_RING(ring, 0x01);
206 a5xx_flush(gpu, ring);
208 /* Check to see if we need to start preemption */
209 a5xx_preempt_trigger(gpu);
212 static const struct {
216 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
217 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
218 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
219 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
220 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
221 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
222 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
223 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
224 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
225 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
226 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
227 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
228 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
229 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
230 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
231 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
232 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
233 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
234 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
235 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
236 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
237 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
238 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
239 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
240 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
241 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
242 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
243 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
244 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
245 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
246 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
247 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
248 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
249 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
250 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
251 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
252 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
253 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
254 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
255 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
256 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
257 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
258 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
259 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
260 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
261 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
262 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
263 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
264 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
265 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
266 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
267 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
268 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
269 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
270 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
271 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
272 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
273 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
274 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
275 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
276 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
277 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
278 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
279 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
280 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
281 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
282 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
283 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
284 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
285 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
286 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
287 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
288 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
289 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
290 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
291 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
292 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
293 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
294 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
295 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
296 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
297 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
298 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
299 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
300 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
301 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
302 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
303 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
304 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
305 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
306 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
307 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
310 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
312 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
315 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
316 gpu_write(gpu, a5xx_hwcg[i].offset,
317 state ? a5xx_hwcg[i].value : 0);
319 if (adreno_is_a540(adreno_gpu)) {
320 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
321 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
324 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
325 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
328 static int a5xx_me_init(struct msm_gpu *gpu)
330 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
331 struct msm_ringbuffer *ring = gpu->rb[0];
333 OUT_PKT7(ring, CP_ME_INIT, 8);
335 OUT_RING(ring, 0x0000002F);
337 /* Enable multiple hardware contexts */
338 OUT_RING(ring, 0x00000003);
340 /* Enable error detection */
341 OUT_RING(ring, 0x20000000);
343 /* Don't enable header dump */
344 OUT_RING(ring, 0x00000000);
345 OUT_RING(ring, 0x00000000);
347 /* Specify workarounds for various microcode issues */
348 if (adreno_is_a530(adreno_gpu)) {
349 /* Workaround for token end syncs
350 * Force a WFI after every direct-render 3D mode draw and every
353 OUT_RING(ring, 0x0000000B);
355 /* No workarounds enabled */
356 OUT_RING(ring, 0x00000000);
359 OUT_RING(ring, 0x00000000);
360 OUT_RING(ring, 0x00000000);
362 gpu->funcs->flush(gpu, ring);
363 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
366 static int a5xx_preempt_start(struct msm_gpu *gpu)
368 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
369 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
370 struct msm_ringbuffer *ring = gpu->rb[0];
372 if (gpu->nr_rings == 1)
375 /* Turn off protected mode to write to special registers */
376 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
379 /* Set the save preemption record for the ring/command */
380 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
381 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
382 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
384 /* Turn back on protected mode */
385 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
388 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
389 OUT_RING(ring, 0x00);
391 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
392 OUT_RING(ring, 0x01);
394 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
395 OUT_RING(ring, 0x01);
397 /* Yield the floor on command completion */
398 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
399 OUT_RING(ring, 0x00);
400 OUT_RING(ring, 0x00);
401 OUT_RING(ring, 0x01);
402 OUT_RING(ring, 0x01);
404 gpu->funcs->flush(gpu, ring);
406 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
409 static int a5xx_ucode_init(struct msm_gpu *gpu)
411 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
412 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
415 if (!a5xx_gpu->pm4_bo) {
416 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
417 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
420 if (IS_ERR(a5xx_gpu->pm4_bo)) {
421 ret = PTR_ERR(a5xx_gpu->pm4_bo);
422 a5xx_gpu->pm4_bo = NULL;
423 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
428 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
431 if (!a5xx_gpu->pfp_bo) {
432 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
433 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
435 if (IS_ERR(a5xx_gpu->pfp_bo)) {
436 ret = PTR_ERR(a5xx_gpu->pfp_bo);
437 a5xx_gpu->pfp_bo = NULL;
438 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
443 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
446 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
447 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
449 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
450 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
455 #define SCM_GPU_ZAP_SHADER_RESUME 0
457 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
461 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
463 DRM_ERROR("%s: zap-shader resume failed: %d\n",
469 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
475 * If the zap shader is already loaded into memory we just need to kick
476 * the remote processor to reinitialize it
479 return a5xx_zap_shader_resume(gpu);
481 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
487 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
488 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
489 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
490 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
491 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
492 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
493 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
494 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
495 A5XX_RBBM_INT_0_MASK_CP_SW | \
496 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
497 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
498 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
500 static int a5xx_hw_init(struct msm_gpu *gpu)
502 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
505 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
507 if (adreno_is_a540(adreno_gpu))
508 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
510 /* Make all blocks contribute to the GPU BUSY perf counter */
511 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
513 /* Enable RBBM error reporting bits */
514 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
516 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
518 * Mask out the activity signals from RB1-3 to avoid false
522 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
524 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
526 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
528 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
530 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
532 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
534 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
536 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
540 /* Enable fault detection */
541 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
544 /* Turn on performance counters */
545 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
547 /* Select CP0 to always count cycles */
548 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
550 /* Select RBBM0 to countable 6 to get the busy status for devfreq */
551 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
553 /* Increase VFD cache access so LRZ and other data gets evicted less */
554 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
556 /* Disable L2 bypass in the UCHE */
557 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
558 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
559 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
560 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
562 /* Set the GMEM VA range (0 to gpu->gmem) */
563 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
564 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
565 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
566 0x00100000 + adreno_gpu->gmem - 1);
567 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
569 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
570 if (adreno_is_a530(adreno_gpu))
571 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
572 if (adreno_is_a540(adreno_gpu))
573 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
574 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
575 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
577 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
579 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
580 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
582 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
584 /* Enable USE_RETENTION_FLOPS */
585 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
587 /* Enable ME/PFP split notification */
588 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
591 a5xx_set_hwcg(gpu, true);
593 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
595 /* Set the highest bank bit */
596 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
597 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
598 if (adreno_is_a540(adreno_gpu))
599 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
601 /* Protect registers from the CP */
602 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
605 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
606 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
607 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
608 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
609 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
610 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
612 /* Content protect */
613 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
614 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
616 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
617 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
620 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
621 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
622 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
623 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
626 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
627 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
630 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
631 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
634 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
636 if (adreno_is_a530(adreno_gpu))
637 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
638 ADRENO_PROTECT_RW(0x10000, 0x8000));
640 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
642 * Disable the trusted memory range - we don't actually supported secure
643 * memory rendering at this point in time and we don't want to block off
644 * part of the virtual memory space.
646 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
647 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
648 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
650 /* Put the GPU into 64 bit by default */
651 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
652 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
653 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
654 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
655 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
656 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
657 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
658 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
659 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
660 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
661 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
662 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
665 * VPC corner case with local memory load kill leads to corrupt
666 * internal state. Normal Disable does not work for all a5x chips.
667 * So do the following setting to disable it.
669 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
670 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
671 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
674 ret = adreno_hw_init(gpu);
678 a5xx_preempt_hw_init(gpu);
680 a5xx_gpmu_ucode_init(gpu);
682 ret = a5xx_ucode_init(gpu);
686 /* Disable the interrupts through the initial bringup stage */
687 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
689 /* Clear ME_HALT to start the micro engine */
690 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
691 ret = a5xx_me_init(gpu);
695 ret = a5xx_power_init(gpu);
700 * Send a pipeline event stat to get misbehaving counters to start
703 if (adreno_is_a530(adreno_gpu)) {
704 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
705 OUT_RING(gpu->rb[0], 0x0F);
707 gpu->funcs->flush(gpu, gpu->rb[0]);
708 if (!a5xx_idle(gpu, gpu->rb[0]))
713 * Try to load a zap shader into the secure world. If successful
714 * we can use the CP to switch out of secure mode. If not then we
715 * have no resource but to try to switch ourselves out manually. If we
716 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
717 * be blocked and a permissions violation will soon follow.
719 ret = a5xx_zap_shader_init(gpu);
721 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
722 OUT_RING(gpu->rb[0], 0x00000000);
724 gpu->funcs->flush(gpu, gpu->rb[0]);
725 if (!a5xx_idle(gpu, gpu->rb[0]))
728 /* Print a warning so if we die, we know why */
729 dev_warn_once(gpu->dev->dev,
730 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
731 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
734 /* Last step - yield the ringbuffer */
735 a5xx_preempt_start(gpu);
740 static void a5xx_recover(struct msm_gpu *gpu)
744 adreno_dump_info(gpu);
746 for (i = 0; i < 8; i++) {
747 printk("CP_SCRATCH_REG%d: %u\n", i,
748 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
754 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
755 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
756 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
760 static void a5xx_destroy(struct msm_gpu *gpu)
762 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
763 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
765 DBG("%s", gpu->name);
767 a5xx_preempt_fini(gpu);
769 if (a5xx_gpu->pm4_bo) {
770 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
771 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
774 if (a5xx_gpu->pfp_bo) {
775 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
776 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
779 if (a5xx_gpu->gpmu_bo) {
780 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
781 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
784 adreno_gpu_cleanup(adreno_gpu);
788 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
790 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
794 * Nearly every abnormality ends up pausing the GPU and triggering a
795 * fault so we can safely just watch for this one interrupt to fire
797 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
798 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
801 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
803 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
804 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
806 if (ring != a5xx_gpu->cur_ring) {
807 WARN(1, "Tried to idle a non-current ringbuffer\n");
811 /* wait for CP to drain ringbuffer: */
812 if (!adreno_idle(gpu, ring))
815 if (spin_until(_a5xx_check_idle(gpu))) {
816 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
817 gpu->name, __builtin_return_address(0),
818 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
819 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
820 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
821 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
828 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
830 struct msm_gpu *gpu = arg;
831 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
833 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
834 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
835 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
836 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
841 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
843 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
845 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
848 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
851 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
855 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
856 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
858 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
862 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
863 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
864 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
866 if (status & A5XX_CP_INT_CP_DMA_ERROR)
867 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
869 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
870 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
872 dev_err_ratelimited(gpu->dev->dev,
873 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
874 val & (1 << 24) ? "WRITE" : "READ",
875 (val & 0xFFFFF) >> 2, val);
878 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
879 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
880 const char *access[16] = { "reserved", "reserved",
881 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
882 "", "", "me read", "me write", "", "", "crashdump read",
885 dev_err_ratelimited(gpu->dev->dev,
886 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
887 status & 0xFFFFF, access[(status >> 24) & 0xF],
888 (status & (1 << 31)), status);
892 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
894 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
895 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
897 dev_err_ratelimited(gpu->dev->dev,
898 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
899 val & (1 << 28) ? "WRITE" : "READ",
900 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
903 /* Clear the error */
904 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
906 /* Clear the interrupt */
907 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
908 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
911 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
912 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
914 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
915 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
916 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
918 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
919 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
920 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
922 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
923 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
924 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
926 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
927 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
929 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
930 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
933 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
935 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
937 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
939 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
943 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
945 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
948 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
950 struct drm_device *dev = gpu->dev;
951 struct msm_drm_private *priv = dev->dev_private;
952 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
954 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
955 ring ? ring->id : -1, ring ? ring->seqno : 0,
956 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
957 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
958 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
959 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
960 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
961 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
962 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
964 /* Turn off the hangcheck timer to keep it from bothering us */
965 del_timer(&gpu->hangcheck_timer);
967 queue_work(priv->wq, &gpu->recover_work);
970 #define RBBM_ERROR_MASK \
971 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
972 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
973 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
974 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
975 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
976 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
978 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
980 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
983 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
984 * before the source is cleared the interrupt will storm.
986 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
987 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
989 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
990 if (status & RBBM_ERROR_MASK)
991 a5xx_rbbm_err_irq(gpu, status);
993 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
994 a5xx_cp_err_irq(gpu);
996 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
997 a5xx_fault_detect_irq(gpu);
999 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1000 a5xx_uche_err_irq(gpu);
1002 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1003 a5xx_gpmu_err_irq(gpu);
1005 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1006 a5xx_preempt_trigger(gpu);
1007 msm_gpu_retire(gpu);
1010 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1011 a5xx_preempt_irq(gpu);
1016 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1017 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1018 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1019 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1020 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1021 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1022 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1023 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1024 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1027 static const u32 a5xx_registers[] = {
1028 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1029 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1030 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1031 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1032 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1033 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1034 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1035 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1036 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1037 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1038 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1039 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1040 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1041 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1042 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1043 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1044 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1045 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1046 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1047 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1048 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1049 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1050 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1051 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1052 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1053 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1054 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1058 static void a5xx_dump(struct msm_gpu *gpu)
1060 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
1061 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1065 static int a5xx_pm_resume(struct msm_gpu *gpu)
1069 /* Turn on the core power */
1070 ret = msm_gpu_pm_resume(gpu);
1074 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1075 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1077 /* Wait 3 usecs before polling */
1080 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1081 (1 << 20), (1 << 20));
1083 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1085 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1089 /* Turn on the SP domain */
1090 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1091 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1092 (1 << 20), (1 << 20));
1094 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1100 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1102 /* Clear the VBIF pipe before shutting down */
1103 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1104 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1106 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1109 * Reset the VBIF before power collapse to avoid issue with FIFO
1112 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1113 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1115 return msm_gpu_pm_suspend(gpu);
1118 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1120 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1121 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1126 struct a5xx_crashdumper {
1128 struct drm_gem_object *bo;
1132 struct a5xx_gpu_state {
1133 struct msm_gpu_state base;
1137 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1138 struct a5xx_crashdumper *dumper)
1140 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1141 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1142 &dumper->bo, &dumper->iova);
1144 if (!IS_ERR(dumper->ptr))
1145 msm_gem_object_set_name(dumper->bo, "crashdump");
1147 return PTR_ERR_OR_ZERO(dumper->ptr);
1150 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1151 struct a5xx_crashdumper *dumper)
1155 if (IS_ERR_OR_NULL(dumper->ptr))
1158 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1159 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1161 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1163 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1164 val & 0x04, 100, 10000);
1168 * These are a list of the registers that need to be read through the HLSQ
1169 * aperture through the crashdumper. These are not nominally accessible from
1170 * the CPU on a secure platform.
1172 static const struct {
1176 } a5xx_hlsq_aperture_regs[] = {
1177 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
1178 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
1179 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
1180 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
1181 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
1182 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
1183 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
1184 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
1185 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1186 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1187 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
1188 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
1189 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
1190 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
1191 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
1194 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1195 struct a5xx_gpu_state *a5xx_state)
1197 struct a5xx_crashdumper dumper = { 0 };
1198 u32 offset, count = 0;
1202 if (a5xx_crashdumper_init(gpu, &dumper))
1205 /* The script will be written at offset 0 */
1208 /* Start writing the data at offset 256k */
1209 offset = dumper.iova + (256 * SZ_1K);
1211 /* Count how many additional registers to get from the HLSQ aperture */
1212 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1213 count += a5xx_hlsq_aperture_regs[i].count;
1215 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1216 if (!a5xx_state->hlsqregs)
1219 /* Build the crashdump script */
1220 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1221 u32 type = a5xx_hlsq_aperture_regs[i].type;
1222 u32 c = a5xx_hlsq_aperture_regs[i].count;
1224 /* Write the register to select the desired bank */
1225 *ptr++ = ((u64) type << 8);
1226 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1230 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1233 offset += c * sizeof(u32);
1236 /* Write two zeros to close off the script */
1240 if (a5xx_crashdumper_run(gpu, &dumper)) {
1241 kfree(a5xx_state->hlsqregs);
1242 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1246 /* Copy the data from the crashdumper to the state */
1247 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1248 count * sizeof(u32));
1250 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1253 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1255 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1259 return ERR_PTR(-ENOMEM);
1261 /* Temporarily disable hardware clock gating before reading the hw */
1262 a5xx_set_hwcg(gpu, false);
1264 /* First get the generic state from the adreno core */
1265 adreno_gpu_state_get(gpu, &(a5xx_state->base));
1267 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1269 /* Get the HLSQ regs with the help of the crashdumper */
1270 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1272 a5xx_set_hwcg(gpu, true);
1274 return &a5xx_state->base;
1277 static void a5xx_gpu_state_destroy(struct kref *kref)
1279 struct msm_gpu_state *state = container_of(kref,
1280 struct msm_gpu_state, ref);
1281 struct a5xx_gpu_state *a5xx_state = container_of(state,
1282 struct a5xx_gpu_state, base);
1284 kfree(a5xx_state->hlsqregs);
1286 adreno_gpu_state_destroy(state);
1290 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1292 if (IS_ERR_OR_NULL(state))
1295 return kref_put(&state->ref, a5xx_gpu_state_destroy);
1299 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1300 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1301 struct drm_printer *p)
1305 struct a5xx_gpu_state *a5xx_state = container_of(state,
1306 struct a5xx_gpu_state, base);
1308 if (IS_ERR_OR_NULL(state))
1311 adreno_show(gpu, state, p);
1313 /* Dump the additional a5xx HLSQ registers */
1314 if (!a5xx_state->hlsqregs)
1317 drm_printf(p, "registers-hlsq:\n");
1319 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1320 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1321 u32 c = a5xx_hlsq_aperture_regs[i].count;
1323 for (j = 0; j < c; j++, pos++, o++) {
1325 * To keep the crashdump simple we pull the entire range
1326 * for each register type but not all of the registers
1327 * in the range are valid. Fortunately invalid registers
1328 * stick out like a sore thumb with a value of
1331 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1334 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
1335 o << 2, a5xx_state->hlsqregs[pos]);
1341 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1343 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1344 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1346 return a5xx_gpu->cur_ring;
1349 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1351 u64 busy_cycles, busy_time;
1353 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1354 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1356 busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1357 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1359 gpu->devfreq.busy_cycles = busy_cycles;
1361 if (WARN_ON(busy_time > ~0LU))
1364 return (unsigned long)busy_time;
1367 static const struct adreno_gpu_funcs funcs = {
1369 .get_param = adreno_get_param,
1370 .hw_init = a5xx_hw_init,
1371 .pm_suspend = a5xx_pm_suspend,
1372 .pm_resume = a5xx_pm_resume,
1373 .recover = a5xx_recover,
1374 .submit = a5xx_submit,
1375 .flush = a5xx_flush,
1376 .active_ring = a5xx_active_ring,
1378 .destroy = a5xx_destroy,
1379 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1382 #if defined(CONFIG_DEBUG_FS)
1383 .debugfs_init = a5xx_debugfs_init,
1385 .gpu_busy = a5xx_gpu_busy,
1386 .gpu_state_get = a5xx_gpu_state_get,
1387 .gpu_state_put = a5xx_gpu_state_put,
1389 .get_timestamp = a5xx_get_timestamp,
1392 static void check_speed_bin(struct device *dev)
1394 struct nvmem_cell *cell;
1397 cell = nvmem_cell_get(dev, "speed_bin");
1399 /* If a nvmem cell isn't defined, nothing to do */
1403 bin = *((u32 *) nvmem_cell_read(cell, NULL));
1404 nvmem_cell_put(cell);
1408 dev_pm_opp_set_supported_hw(dev, &val, 1);
1411 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1413 struct msm_drm_private *priv = dev->dev_private;
1414 struct platform_device *pdev = priv->gpu_pdev;
1415 struct a5xx_gpu *a5xx_gpu = NULL;
1416 struct adreno_gpu *adreno_gpu;
1417 struct msm_gpu *gpu;
1421 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1422 return ERR_PTR(-ENXIO);
1425 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1427 return ERR_PTR(-ENOMEM);
1429 adreno_gpu = &a5xx_gpu->base;
1430 gpu = &adreno_gpu->base;
1432 adreno_gpu->registers = a5xx_registers;
1433 adreno_gpu->reg_offsets = a5xx_register_offsets;
1435 a5xx_gpu->lm_leakage = 0x4E001A;
1437 check_speed_bin(&pdev->dev);
1439 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1441 a5xx_destroy(&(a5xx_gpu->base.base));
1442 return ERR_PTR(ret);
1446 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1448 /* Set up the preemption specific bits and pieces for each ringbuffer */
1449 a5xx_preempt_init(gpu);