powerpc/tm: Fix restoring FP/VMX facility incorrectly on interrupts
[sfrench/cifs-2.6.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18
19 #define GPU_PAS_ID 13
20
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25         uint32_t wptr;
26         unsigned long flags;
27
28         spin_lock_irqsave(&ring->lock, flags);
29
30         /* Copy the shadow to the actual register */
31         ring->cur = ring->next;
32
33         /* Make sure to wrap wptr if we need to */
34         wptr = get_wptr(ring);
35
36         spin_unlock_irqrestore(&ring->lock, flags);
37
38         /* Make sure everything is posted before making a decision */
39         mb();
40
41         /* Update HW if this is the current ring and we are not in preempt */
42         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47         struct msm_file_private *ctx)
48 {
49         struct msm_drm_private *priv = gpu->dev->dev_private;
50         struct msm_ringbuffer *ring = submit->ring;
51         struct msm_gem_object *obj;
52         uint32_t *ptr, dwords;
53         unsigned int i;
54
55         for (i = 0; i < submit->nr_cmds; i++) {
56                 switch (submit->cmd[i].type) {
57                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58                         break;
59                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60                         if (priv->lastctx == ctx)
61                                 break;
62                 case MSM_SUBMIT_CMD_BUF:
63                         /* copy commands into RB: */
64                         obj = submit->bos[submit->cmd[i].idx].obj;
65                         dwords = submit->cmd[i].size;
66
67                         ptr = msm_gem_get_vaddr(&obj->base);
68
69                         /* _get_vaddr() shouldn't fail at this point,
70                          * since we've already mapped it once in
71                          * submit_reloc()
72                          */
73                         if (WARN_ON(!ptr))
74                                 return;
75
76                         for (i = 0; i < dwords; i++) {
77                                 /* normally the OUT_PKTn() would wait
78                                  * for space for the packet.  But since
79                                  * we just OUT_RING() the whole thing,
80                                  * need to call adreno_wait_ring()
81                                  * ourself:
82                                  */
83                                 adreno_wait_ring(ring, 1);
84                                 OUT_RING(ring, ptr[i]);
85                         }
86
87                         msm_gem_put_vaddr(&obj->base);
88
89                         break;
90                 }
91         }
92
93         a5xx_flush(gpu, ring);
94         a5xx_preempt_trigger(gpu);
95
96         /* we might not necessarily have a cmd from userspace to
97          * trigger an event to know that submit has completed, so
98          * do this manually:
99          */
100         a5xx_idle(gpu, ring);
101         ring->memptrs->fence = submit->seqno;
102         msm_gpu_retire(gpu);
103 }
104
105 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
106         struct msm_file_private *ctx)
107 {
108         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
109         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
110         struct msm_drm_private *priv = gpu->dev->dev_private;
111         struct msm_ringbuffer *ring = submit->ring;
112         unsigned int i, ibs = 0;
113
114         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
115                 priv->lastctx = NULL;
116                 a5xx_submit_in_rb(gpu, submit, ctx);
117                 return;
118         }
119
120         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
121         OUT_RING(ring, 0x02);
122
123         /* Turn off protected mode to write to special registers */
124         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
125         OUT_RING(ring, 0);
126
127         /* Set the save preemption record for the ring/command */
128         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
129         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
130         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131
132         /* Turn back on protected mode */
133         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
134         OUT_RING(ring, 1);
135
136         /* Enable local preemption for finegrain preemption */
137         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
138         OUT_RING(ring, 0x02);
139
140         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
141         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
142         OUT_RING(ring, 0x02);
143
144         /* Submit the commands */
145         for (i = 0; i < submit->nr_cmds; i++) {
146                 switch (submit->cmd[i].type) {
147                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
148                         break;
149                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
150                         if (priv->lastctx == ctx)
151                                 break;
152                 case MSM_SUBMIT_CMD_BUF:
153                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
154                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
155                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
156                         OUT_RING(ring, submit->cmd[i].size);
157                         ibs++;
158                         break;
159                 }
160         }
161
162         /*
163          * Write the render mode to NULL (0) to indicate to the CP that the IBs
164          * are done rendering - otherwise a lucky preemption would start
165          * replaying from the last checkpoint
166          */
167         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
168         OUT_RING(ring, 0);
169         OUT_RING(ring, 0);
170         OUT_RING(ring, 0);
171         OUT_RING(ring, 0);
172         OUT_RING(ring, 0);
173
174         /* Turn off IB level preemptions */
175         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
176         OUT_RING(ring, 0x01);
177
178         /* Write the fence to the scratch register */
179         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
180         OUT_RING(ring, submit->seqno);
181
182         /*
183          * Execute a CACHE_FLUSH_TS event. This will ensure that the
184          * timestamp is written to the memory and then triggers the interrupt
185          */
186         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
187         OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
188         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
189         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
190         OUT_RING(ring, submit->seqno);
191
192         /* Yield the floor on command completion */
193         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
194         /*
195          * If dword[2:1] are non zero, they specify an address for the CP to
196          * write the value of dword[3] to on preemption complete. Write 0 to
197          * skip the write
198          */
199         OUT_RING(ring, 0x00);
200         OUT_RING(ring, 0x00);
201         /* Data value - not used if the address above is 0 */
202         OUT_RING(ring, 0x01);
203         /* Set bit 0 to trigger an interrupt on preempt complete */
204         OUT_RING(ring, 0x01);
205
206         a5xx_flush(gpu, ring);
207
208         /* Check to see if we need to start preemption */
209         a5xx_preempt_trigger(gpu);
210 }
211
212 static const struct {
213         u32 offset;
214         u32 value;
215 } a5xx_hwcg[] = {
216         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
217         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
218         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
219         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
220         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
221         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
222         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
223         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
224         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
225         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
226         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
227         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
228         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
229         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
230         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
231         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
232         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
233         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
234         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
235         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
236         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
237         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
238         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
239         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
240         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
241         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
242         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
243         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
244         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
245         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
246         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
247         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
248         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
249         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
250         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
251         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
252         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
253         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
254         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
255         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
256         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
257         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
258         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
259         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
260         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
261         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
262         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
263         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
264         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
265         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
266         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
267         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
268         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
269         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
270         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
271         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
272         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
273         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
274         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
275         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
276         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
277         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
278         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
279         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
280         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
281         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
282         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
283         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
284         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
285         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
286         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
287         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
288         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
289         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
290         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
291         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
292         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
293         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
294         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
295         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
296         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
297         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
298         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
299         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
300         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
301         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
302         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
303         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
304         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
305         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
306         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
307         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
308 };
309
310 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
311 {
312         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
313         unsigned int i;
314
315         for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
316                 gpu_write(gpu, a5xx_hwcg[i].offset,
317                         state ? a5xx_hwcg[i].value : 0);
318
319         if (adreno_is_a540(adreno_gpu)) {
320                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
321                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
322         }
323
324         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
325         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
326 }
327
328 static int a5xx_me_init(struct msm_gpu *gpu)
329 {
330         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
331         struct msm_ringbuffer *ring = gpu->rb[0];
332
333         OUT_PKT7(ring, CP_ME_INIT, 8);
334
335         OUT_RING(ring, 0x0000002F);
336
337         /* Enable multiple hardware contexts */
338         OUT_RING(ring, 0x00000003);
339
340         /* Enable error detection */
341         OUT_RING(ring, 0x20000000);
342
343         /* Don't enable header dump */
344         OUT_RING(ring, 0x00000000);
345         OUT_RING(ring, 0x00000000);
346
347         /* Specify workarounds for various microcode issues */
348         if (adreno_is_a530(adreno_gpu)) {
349                 /* Workaround for token end syncs
350                  * Force a WFI after every direct-render 3D mode draw and every
351                  * 2D mode 3 draw
352                  */
353                 OUT_RING(ring, 0x0000000B);
354         } else {
355                 /* No workarounds enabled */
356                 OUT_RING(ring, 0x00000000);
357         }
358
359         OUT_RING(ring, 0x00000000);
360         OUT_RING(ring, 0x00000000);
361
362         gpu->funcs->flush(gpu, ring);
363         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
364 }
365
366 static int a5xx_preempt_start(struct msm_gpu *gpu)
367 {
368         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
369         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
370         struct msm_ringbuffer *ring = gpu->rb[0];
371
372         if (gpu->nr_rings == 1)
373                 return 0;
374
375         /* Turn off protected mode to write to special registers */
376         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
377         OUT_RING(ring, 0);
378
379         /* Set the save preemption record for the ring/command */
380         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
381         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
382         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
383
384         /* Turn back on protected mode */
385         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
386         OUT_RING(ring, 1);
387
388         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
389         OUT_RING(ring, 0x00);
390
391         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
392         OUT_RING(ring, 0x01);
393
394         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
395         OUT_RING(ring, 0x01);
396
397         /* Yield the floor on command completion */
398         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
399         OUT_RING(ring, 0x00);
400         OUT_RING(ring, 0x00);
401         OUT_RING(ring, 0x01);
402         OUT_RING(ring, 0x01);
403
404         gpu->funcs->flush(gpu, ring);
405
406         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
407 }
408
409 static int a5xx_ucode_init(struct msm_gpu *gpu)
410 {
411         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
412         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
413         int ret;
414
415         if (!a5xx_gpu->pm4_bo) {
416                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
417                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
418
419
420                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
421                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
422                         a5xx_gpu->pm4_bo = NULL;
423                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
424                                 ret);
425                         return ret;
426                 }
427
428                 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
429         }
430
431         if (!a5xx_gpu->pfp_bo) {
432                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
433                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
434
435                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
436                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
437                         a5xx_gpu->pfp_bo = NULL;
438                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
439                                 ret);
440                         return ret;
441                 }
442
443                 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
444         }
445
446         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
447                 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
448
449         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
450                 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
451
452         return 0;
453 }
454
455 #define SCM_GPU_ZAP_SHADER_RESUME 0
456
457 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
458 {
459         int ret;
460
461         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
462         if (ret)
463                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
464                         gpu->name, ret);
465
466         return ret;
467 }
468
469 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
470 {
471         static bool loaded;
472         int ret;
473
474         /*
475          * If the zap shader is already loaded into memory we just need to kick
476          * the remote processor to reinitialize it
477          */
478         if (loaded)
479                 return a5xx_zap_shader_resume(gpu);
480
481         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
482
483         loaded = !ret;
484         return ret;
485 }
486
487 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
488           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
489           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
490           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
491           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
492           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
493           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
494           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
495           A5XX_RBBM_INT_0_MASK_CP_SW | \
496           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
497           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
498           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
499
500 static int a5xx_hw_init(struct msm_gpu *gpu)
501 {
502         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
503         int ret;
504
505         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
506
507         if (adreno_is_a540(adreno_gpu))
508                 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
509
510         /* Make all blocks contribute to the GPU BUSY perf counter */
511         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
512
513         /* Enable RBBM error reporting bits */
514         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
515
516         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
517                 /*
518                  * Mask out the activity signals from RB1-3 to avoid false
519                  * positives
520                  */
521
522                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
523                         0xF0000000);
524                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
525                         0xFFFFFFFF);
526                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
527                         0xFFFFFFFF);
528                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
529                         0xFFFFFFFF);
530                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
531                         0xFFFFFFFF);
532                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
533                         0xFFFFFFFF);
534                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
535                         0xFFFFFFFF);
536                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
537                         0xFFFFFFFF);
538         }
539
540         /* Enable fault detection */
541         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
542                 (1 << 30) | 0xFFFF);
543
544         /* Turn on performance counters */
545         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
546
547         /* Select CP0 to always count cycles */
548         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
549
550         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
551         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
552
553         /* Increase VFD cache access so LRZ and other data gets evicted less */
554         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
555
556         /* Disable L2 bypass in the UCHE */
557         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
558         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
559         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
560         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
561
562         /* Set the GMEM VA range (0 to gpu->gmem) */
563         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
564         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
565         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
566                 0x00100000 + adreno_gpu->gmem - 1);
567         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
568
569         gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
570         if (adreno_is_a530(adreno_gpu))
571                 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
572         if (adreno_is_a540(adreno_gpu))
573                 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
574         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
575         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
576
577         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
578
579         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
580                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
581
582         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
583
584         /* Enable USE_RETENTION_FLOPS */
585         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
586
587         /* Enable ME/PFP split notification */
588         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
589
590         /* Enable HWCG */
591         a5xx_set_hwcg(gpu, true);
592
593         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
594
595         /* Set the highest bank bit */
596         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
597         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
598         if (adreno_is_a540(adreno_gpu))
599                 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
600
601         /* Protect registers from the CP */
602         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
603
604         /* RBBM */
605         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
606         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
607         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
608         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
609         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
610         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
611
612         /* Content protect */
613         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
614                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
615                         16));
616         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
617                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
618
619         /* CP */
620         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
621         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
622         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
623         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
624
625         /* RB */
626         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
627         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
628
629         /* VPC */
630         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
631         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
632
633         /* UCHE */
634         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
635
636         if (adreno_is_a530(adreno_gpu))
637                 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
638                         ADRENO_PROTECT_RW(0x10000, 0x8000));
639
640         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
641         /*
642          * Disable the trusted memory range - we don't actually supported secure
643          * memory rendering at this point in time and we don't want to block off
644          * part of the virtual memory space.
645          */
646         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
647                 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
648         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
649
650         /* Put the GPU into 64 bit by default */
651         gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
652         gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
653         gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
654         gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
655         gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
656         gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
657         gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
658         gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
659         gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
660         gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
661         gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
662         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
663
664         /*
665          * VPC corner case with local memory load kill leads to corrupt
666          * internal state. Normal Disable does not work for all a5x chips.
667          * So do the following setting to disable it.
668          */
669         if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
670                 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
671                 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
672         }
673
674         ret = adreno_hw_init(gpu);
675         if (ret)
676                 return ret;
677
678         a5xx_preempt_hw_init(gpu);
679
680         a5xx_gpmu_ucode_init(gpu);
681
682         ret = a5xx_ucode_init(gpu);
683         if (ret)
684                 return ret;
685
686         /* Disable the interrupts through the initial bringup stage */
687         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
688
689         /* Clear ME_HALT to start the micro engine */
690         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
691         ret = a5xx_me_init(gpu);
692         if (ret)
693                 return ret;
694
695         ret = a5xx_power_init(gpu);
696         if (ret)
697                 return ret;
698
699         /*
700          * Send a pipeline event stat to get misbehaving counters to start
701          * ticking correctly
702          */
703         if (adreno_is_a530(adreno_gpu)) {
704                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
705                 OUT_RING(gpu->rb[0], 0x0F);
706
707                 gpu->funcs->flush(gpu, gpu->rb[0]);
708                 if (!a5xx_idle(gpu, gpu->rb[0]))
709                         return -EINVAL;
710         }
711
712         /*
713          * Try to load a zap shader into the secure world. If successful
714          * we can use the CP to switch out of secure mode. If not then we
715          * have no resource but to try to switch ourselves out manually. If we
716          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
717          * be blocked and a permissions violation will soon follow.
718          */
719         ret = a5xx_zap_shader_init(gpu);
720         if (!ret) {
721                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
722                 OUT_RING(gpu->rb[0], 0x00000000);
723
724                 gpu->funcs->flush(gpu, gpu->rb[0]);
725                 if (!a5xx_idle(gpu, gpu->rb[0]))
726                         return -EINVAL;
727         } else {
728                 /* Print a warning so if we die, we know why */
729                 dev_warn_once(gpu->dev->dev,
730                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
731                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
732         }
733
734         /* Last step - yield the ringbuffer */
735         a5xx_preempt_start(gpu);
736
737         return 0;
738 }
739
740 static void a5xx_recover(struct msm_gpu *gpu)
741 {
742         int i;
743
744         adreno_dump_info(gpu);
745
746         for (i = 0; i < 8; i++) {
747                 printk("CP_SCRATCH_REG%d: %u\n", i,
748                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
749         }
750
751         if (hang_debug)
752                 a5xx_dump(gpu);
753
754         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
755         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
756         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
757         adreno_recover(gpu);
758 }
759
760 static void a5xx_destroy(struct msm_gpu *gpu)
761 {
762         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
763         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
764
765         DBG("%s", gpu->name);
766
767         a5xx_preempt_fini(gpu);
768
769         if (a5xx_gpu->pm4_bo) {
770                 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
771                 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
772         }
773
774         if (a5xx_gpu->pfp_bo) {
775                 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
776                 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
777         }
778
779         if (a5xx_gpu->gpmu_bo) {
780                 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
781                 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
782         }
783
784         adreno_gpu_cleanup(adreno_gpu);
785         kfree(a5xx_gpu);
786 }
787
788 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
789 {
790         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
791                 return false;
792
793         /*
794          * Nearly every abnormality ends up pausing the GPU and triggering a
795          * fault so we can safely just watch for this one interrupt to fire
796          */
797         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
798                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
799 }
800
801 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
802 {
803         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
804         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
805
806         if (ring != a5xx_gpu->cur_ring) {
807                 WARN(1, "Tried to idle a non-current ringbuffer\n");
808                 return false;
809         }
810
811         /* wait for CP to drain ringbuffer: */
812         if (!adreno_idle(gpu, ring))
813                 return false;
814
815         if (spin_until(_a5xx_check_idle(gpu))) {
816                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
817                         gpu->name, __builtin_return_address(0),
818                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
819                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
820                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
821                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
822                 return false;
823         }
824
825         return true;
826 }
827
828 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
829 {
830         struct msm_gpu *gpu = arg;
831         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
832                         iova, flags,
833                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
834                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
835                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
836                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
837
838         return -EFAULT;
839 }
840
841 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
842 {
843         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
844
845         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
846                 u32 val;
847
848                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
849
850                 /*
851                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
852                  * read it twice
853                  */
854
855                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
856                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
857
858                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
859                         val);
860         }
861
862         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
863                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
864                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
865
866         if (status & A5XX_CP_INT_CP_DMA_ERROR)
867                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
868
869         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
870                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
871
872                 dev_err_ratelimited(gpu->dev->dev,
873                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
874                         val & (1 << 24) ? "WRITE" : "READ",
875                         (val & 0xFFFFF) >> 2, val);
876         }
877
878         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
879                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
880                 const char *access[16] = { "reserved", "reserved",
881                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
882                         "", "", "me read", "me write", "", "", "crashdump read",
883                         "crashdump write" };
884
885                 dev_err_ratelimited(gpu->dev->dev,
886                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
887                         status & 0xFFFFF, access[(status >> 24) & 0xF],
888                         (status & (1 << 31)), status);
889         }
890 }
891
892 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
893 {
894         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
895                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
896
897                 dev_err_ratelimited(gpu->dev->dev,
898                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
899                         val & (1 << 28) ? "WRITE" : "READ",
900                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
901                         (val >> 24) & 0xF);
902
903                 /* Clear the error */
904                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
905
906                 /* Clear the interrupt */
907                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
908                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
909         }
910
911         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
912                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
913
914         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
915                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
916                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
917
918         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
919                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
920                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
921
922         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
923                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
924                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
925
926         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
927                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
928
929         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
930                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
931 }
932
933 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
934 {
935         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
936
937         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
938
939         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
940                 addr);
941 }
942
943 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
944 {
945         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
946 }
947
948 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
949 {
950         struct drm_device *dev = gpu->dev;
951         struct msm_drm_private *priv = dev->dev_private;
952         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
953
954         DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
955                 ring ? ring->id : -1, ring ? ring->seqno : 0,
956                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
957                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
958                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
959                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
960                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
961                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
962                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
963
964         /* Turn off the hangcheck timer to keep it from bothering us */
965         del_timer(&gpu->hangcheck_timer);
966
967         queue_work(priv->wq, &gpu->recover_work);
968 }
969
970 #define RBBM_ERROR_MASK \
971         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
972         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
973         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
974         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
975         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
976         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
977
978 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
979 {
980         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
981
982         /*
983          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
984          * before the source is cleared the interrupt will storm.
985          */
986         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
987                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
988
989         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
990         if (status & RBBM_ERROR_MASK)
991                 a5xx_rbbm_err_irq(gpu, status);
992
993         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
994                 a5xx_cp_err_irq(gpu);
995
996         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
997                 a5xx_fault_detect_irq(gpu);
998
999         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1000                 a5xx_uche_err_irq(gpu);
1001
1002         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1003                 a5xx_gpmu_err_irq(gpu);
1004
1005         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1006                 a5xx_preempt_trigger(gpu);
1007                 msm_gpu_retire(gpu);
1008         }
1009
1010         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1011                 a5xx_preempt_irq(gpu);
1012
1013         return IRQ_HANDLED;
1014 }
1015
1016 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1017         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1018         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1019         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1020         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1021                 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1022         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1023         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1024         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1025 };
1026
1027 static const u32 a5xx_registers[] = {
1028         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1029         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1030         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1031         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1032         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1033         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1034         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1035         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1036         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1037         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1038         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1039         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1040         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1041         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1042         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1043         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1044         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1045         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1046         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1047         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1048         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1049         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1050         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1051         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1052         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1053         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1054         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1055         0xAC60, 0xAC60, ~0,
1056 };
1057
1058 static void a5xx_dump(struct msm_gpu *gpu)
1059 {
1060         DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1061                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1062         adreno_dump(gpu);
1063 }
1064
1065 static int a5xx_pm_resume(struct msm_gpu *gpu)
1066 {
1067         int ret;
1068
1069         /* Turn on the core power */
1070         ret = msm_gpu_pm_resume(gpu);
1071         if (ret)
1072                 return ret;
1073
1074         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1075         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1076
1077         /* Wait 3 usecs before polling */
1078         udelay(3);
1079
1080         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1081                 (1 << 20), (1 << 20));
1082         if (ret) {
1083                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1084                         gpu->name,
1085                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1086                 return ret;
1087         }
1088
1089         /* Turn on the SP domain */
1090         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1091         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1092                 (1 << 20), (1 << 20));
1093         if (ret)
1094                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1095                         gpu->name);
1096
1097         return ret;
1098 }
1099
1100 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1101 {
1102         /* Clear the VBIF pipe before shutting down */
1103         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1104         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1105
1106         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1107
1108         /*
1109          * Reset the VBIF before power collapse to avoid issue with FIFO
1110          * entries
1111          */
1112         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1113         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1114
1115         return msm_gpu_pm_suspend(gpu);
1116 }
1117
1118 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1119 {
1120         *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1121                 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1122
1123         return 0;
1124 }
1125
1126 struct a5xx_crashdumper {
1127         void *ptr;
1128         struct drm_gem_object *bo;
1129         u64 iova;
1130 };
1131
1132 struct a5xx_gpu_state {
1133         struct msm_gpu_state base;
1134         u32 *hlsqregs;
1135 };
1136
1137 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1138                 struct a5xx_crashdumper *dumper)
1139 {
1140         dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1141                 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1142                 &dumper->bo, &dumper->iova);
1143
1144         if (!IS_ERR(dumper->ptr))
1145                 msm_gem_object_set_name(dumper->bo, "crashdump");
1146
1147         return PTR_ERR_OR_ZERO(dumper->ptr);
1148 }
1149
1150 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1151                 struct a5xx_crashdumper *dumper)
1152 {
1153         u32 val;
1154
1155         if (IS_ERR_OR_NULL(dumper->ptr))
1156                 return -EINVAL;
1157
1158         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1159                 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1160
1161         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1162
1163         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1164                 val & 0x04, 100, 10000);
1165 }
1166
1167 /*
1168  * These are a list of the registers that need to be read through the HLSQ
1169  * aperture through the crashdumper.  These are not nominally accessible from
1170  * the CPU on a secure platform.
1171  */
1172 static const struct {
1173         u32 type;
1174         u32 regoffset;
1175         u32 count;
1176 } a5xx_hlsq_aperture_regs[] = {
1177         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1178         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1179         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1180         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1181         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1182         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1183         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1184         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1185         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1186         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1187         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1188         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1189         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1190         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1191         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1192 };
1193
1194 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1195                 struct a5xx_gpu_state *a5xx_state)
1196 {
1197         struct a5xx_crashdumper dumper = { 0 };
1198         u32 offset, count = 0;
1199         u64 *ptr;
1200         int i;
1201
1202         if (a5xx_crashdumper_init(gpu, &dumper))
1203                 return;
1204
1205         /* The script will be written at offset 0 */
1206         ptr = dumper.ptr;
1207
1208         /* Start writing the data at offset 256k */
1209         offset = dumper.iova + (256 * SZ_1K);
1210
1211         /* Count how many additional registers to get from the HLSQ aperture */
1212         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1213                 count += a5xx_hlsq_aperture_regs[i].count;
1214
1215         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1216         if (!a5xx_state->hlsqregs)
1217                 return;
1218
1219         /* Build the crashdump script */
1220         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1221                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1222                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1223
1224                 /* Write the register to select the desired bank */
1225                 *ptr++ = ((u64) type << 8);
1226                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1227                         (1 << 21) | 1;
1228
1229                 *ptr++ = offset;
1230                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1231                         | c;
1232
1233                 offset += c * sizeof(u32);
1234         }
1235
1236         /* Write two zeros to close off the script */
1237         *ptr++ = 0;
1238         *ptr++ = 0;
1239
1240         if (a5xx_crashdumper_run(gpu, &dumper)) {
1241                 kfree(a5xx_state->hlsqregs);
1242                 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1243                 return;
1244         }
1245
1246         /* Copy the data from the crashdumper to the state */
1247         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1248                 count * sizeof(u32));
1249
1250         msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1251 }
1252
1253 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1254 {
1255         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1256                         GFP_KERNEL);
1257
1258         if (!a5xx_state)
1259                 return ERR_PTR(-ENOMEM);
1260
1261         /* Temporarily disable hardware clock gating before reading the hw */
1262         a5xx_set_hwcg(gpu, false);
1263
1264         /* First get the generic state from the adreno core */
1265         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1266
1267         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1268
1269         /* Get the HLSQ regs with the help of the crashdumper */
1270         a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1271
1272         a5xx_set_hwcg(gpu, true);
1273
1274         return &a5xx_state->base;
1275 }
1276
1277 static void a5xx_gpu_state_destroy(struct kref *kref)
1278 {
1279         struct msm_gpu_state *state = container_of(kref,
1280                 struct msm_gpu_state, ref);
1281         struct a5xx_gpu_state *a5xx_state = container_of(state,
1282                 struct a5xx_gpu_state, base);
1283
1284         kfree(a5xx_state->hlsqregs);
1285
1286         adreno_gpu_state_destroy(state);
1287         kfree(a5xx_state);
1288 }
1289
1290 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1291 {
1292         if (IS_ERR_OR_NULL(state))
1293                 return 1;
1294
1295         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1296 }
1297
1298
1299 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1300 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1301                 struct drm_printer *p)
1302 {
1303         int i, j;
1304         u32 pos = 0;
1305         struct a5xx_gpu_state *a5xx_state = container_of(state,
1306                 struct a5xx_gpu_state, base);
1307
1308         if (IS_ERR_OR_NULL(state))
1309                 return;
1310
1311         adreno_show(gpu, state, p);
1312
1313         /* Dump the additional a5xx HLSQ registers */
1314         if (!a5xx_state->hlsqregs)
1315                 return;
1316
1317         drm_printf(p, "registers-hlsq:\n");
1318
1319         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1320                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1321                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1322
1323                 for (j = 0; j < c; j++, pos++, o++) {
1324                         /*
1325                          * To keep the crashdump simple we pull the entire range
1326                          * for each register type but not all of the registers
1327                          * in the range are valid. Fortunately invalid registers
1328                          * stick out like a sore thumb with a value of
1329                          * 0xdeadbeef
1330                          */
1331                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1332                                 continue;
1333
1334                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1335                                 o << 2, a5xx_state->hlsqregs[pos]);
1336                 }
1337         }
1338 }
1339 #endif
1340
1341 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1342 {
1343         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1344         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1345
1346         return a5xx_gpu->cur_ring;
1347 }
1348
1349 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1350 {
1351         u64 busy_cycles, busy_time;
1352
1353         busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1354                         REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1355
1356         busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1357         do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1358
1359         gpu->devfreq.busy_cycles = busy_cycles;
1360
1361         if (WARN_ON(busy_time > ~0LU))
1362                 return ~0LU;
1363
1364         return (unsigned long)busy_time;
1365 }
1366
1367 static const struct adreno_gpu_funcs funcs = {
1368         .base = {
1369                 .get_param = adreno_get_param,
1370                 .hw_init = a5xx_hw_init,
1371                 .pm_suspend = a5xx_pm_suspend,
1372                 .pm_resume = a5xx_pm_resume,
1373                 .recover = a5xx_recover,
1374                 .submit = a5xx_submit,
1375                 .flush = a5xx_flush,
1376                 .active_ring = a5xx_active_ring,
1377                 .irq = a5xx_irq,
1378                 .destroy = a5xx_destroy,
1379 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1380                 .show = a5xx_show,
1381 #endif
1382 #if defined(CONFIG_DEBUG_FS)
1383                 .debugfs_init = a5xx_debugfs_init,
1384 #endif
1385                 .gpu_busy = a5xx_gpu_busy,
1386                 .gpu_state_get = a5xx_gpu_state_get,
1387                 .gpu_state_put = a5xx_gpu_state_put,
1388         },
1389         .get_timestamp = a5xx_get_timestamp,
1390 };
1391
1392 static void check_speed_bin(struct device *dev)
1393 {
1394         struct nvmem_cell *cell;
1395         u32 bin, val;
1396
1397         cell = nvmem_cell_get(dev, "speed_bin");
1398
1399         /* If a nvmem cell isn't defined, nothing to do */
1400         if (IS_ERR(cell))
1401                 return;
1402
1403         bin = *((u32 *) nvmem_cell_read(cell, NULL));
1404         nvmem_cell_put(cell);
1405
1406         val = (1 << bin);
1407
1408         dev_pm_opp_set_supported_hw(dev, &val, 1);
1409 }
1410
1411 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1412 {
1413         struct msm_drm_private *priv = dev->dev_private;
1414         struct platform_device *pdev = priv->gpu_pdev;
1415         struct a5xx_gpu *a5xx_gpu = NULL;
1416         struct adreno_gpu *adreno_gpu;
1417         struct msm_gpu *gpu;
1418         int ret;
1419
1420         if (!pdev) {
1421                 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1422                 return ERR_PTR(-ENXIO);
1423         }
1424
1425         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1426         if (!a5xx_gpu)
1427                 return ERR_PTR(-ENOMEM);
1428
1429         adreno_gpu = &a5xx_gpu->base;
1430         gpu = &adreno_gpu->base;
1431
1432         adreno_gpu->registers = a5xx_registers;
1433         adreno_gpu->reg_offsets = a5xx_register_offsets;
1434
1435         a5xx_gpu->lm_leakage = 0x4E001A;
1436
1437         check_speed_bin(&pdev->dev);
1438
1439         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1440         if (ret) {
1441                 a5xx_destroy(&(a5xx_gpu->base.base));
1442                 return ERR_PTR(ret);
1443         }
1444
1445         if (gpu->aspace)
1446                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1447
1448         /* Set up the preemption specific bits and pieces for each ringbuffer */
1449         a5xx_preempt_init(gpu);
1450
1451         return gpu;
1452 }