Merge tag 'pstore-v6.2-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / msm / adreno / a6xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3
4
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/reset.h>
14 #include <linux/soc/qcom/llcc-qcom.h>
15
16 #define GPU_PAS_ID 13
17
18 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
19 {
20         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
21         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
22
23         /* Check that the GMU is idle */
24         if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
25                 return false;
26
27         /* Check tha the CX master is idle */
28         if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
29                         ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
30                 return false;
31
32         return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
33                 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
34 }
35
36 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
37 {
38         /* wait for CP to drain ringbuffer: */
39         if (!adreno_idle(gpu, ring))
40                 return false;
41
42         if (spin_until(_a6xx_check_idle(gpu))) {
43                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
44                         gpu->name, __builtin_return_address(0),
45                         gpu_read(gpu, REG_A6XX_RBBM_STATUS),
46                         gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
47                         gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
48                         gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
49                 return false;
50         }
51
52         return true;
53 }
54
55 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
56 {
57         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
58         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
59
60         /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
61         if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
62                 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
63                 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
64                 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
65         }
66 }
67
68 static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
69 {
70         uint32_t wptr;
71         unsigned long flags;
72
73         update_shadow_rptr(gpu, ring);
74
75         spin_lock_irqsave(&ring->preempt_lock, flags);
76
77         /* Copy the shadow to the actual register */
78         ring->cur = ring->next;
79
80         /* Make sure to wrap wptr if we need to */
81         wptr = get_wptr(ring);
82
83         spin_unlock_irqrestore(&ring->preempt_lock, flags);
84
85         /* Make sure everything is posted before making a decision */
86         mb();
87
88         gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
89 }
90
91 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
92                 u64 iova)
93 {
94         OUT_PKT7(ring, CP_REG_TO_MEM, 3);
95         OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
96                 CP_REG_TO_MEM_0_CNT(2) |
97                 CP_REG_TO_MEM_0_64B);
98         OUT_RING(ring, lower_32_bits(iova));
99         OUT_RING(ring, upper_32_bits(iova));
100 }
101
102 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
103                 struct msm_ringbuffer *ring, struct msm_file_private *ctx)
104 {
105         bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
106         phys_addr_t ttbr;
107         u32 asid;
108         u64 memptr = rbmemptr(ring, ttbr0);
109
110         if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno)
111                 return;
112
113         if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
114                 return;
115
116         if (!sysprof) {
117                 /* Turn off protected mode to write to special registers */
118                 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
119                 OUT_RING(ring, 0);
120
121                 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
122                 OUT_RING(ring, 1);
123         }
124
125         /* Execute the table update */
126         OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
127         OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
128
129         OUT_RING(ring,
130                 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
131                 CP_SMMU_TABLE_UPDATE_1_ASID(asid));
132         OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
133         OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
134
135         /*
136          * Write the new TTBR0 to the memstore. This is good for debugging.
137          */
138         OUT_PKT7(ring, CP_MEM_WRITE, 4);
139         OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
140         OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
141         OUT_RING(ring, lower_32_bits(ttbr));
142         OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
143
144         /*
145          * And finally, trigger a uche flush to be sure there isn't anything
146          * lingering in that part of the GPU
147          */
148
149         OUT_PKT7(ring, CP_EVENT_WRITE, 1);
150         OUT_RING(ring, CACHE_INVALIDATE);
151
152         if (!sysprof) {
153                 /*
154                  * Wait for SRAM clear after the pgtable update, so the
155                  * two can happen in parallel:
156                  */
157                 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
158                 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
159                 OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
160                                 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
161                 OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
162                 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
163                 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
164                 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
165
166                 /* Re-enable protected mode: */
167                 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
168                 OUT_RING(ring, 1);
169         }
170 }
171
172 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
173 {
174         unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
175         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
176         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
177         struct msm_ringbuffer *ring = submit->ring;
178         unsigned int i, ibs = 0;
179
180         a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
181
182         get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
183                 rbmemptr_stats(ring, index, cpcycles_start));
184
185         /*
186          * For PM4 the GMU register offsets are calculated from the base of the
187          * GPU registers so we need to add 0x1a800 to the register value on A630
188          * to get the right value from PM4.
189          */
190         get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
191                 rbmemptr_stats(ring, index, alwayson_start));
192
193         /* Invalidate CCU depth and color */
194         OUT_PKT7(ring, CP_EVENT_WRITE, 1);
195         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
196
197         OUT_PKT7(ring, CP_EVENT_WRITE, 1);
198         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
199
200         /* Submit the commands */
201         for (i = 0; i < submit->nr_cmds; i++) {
202                 switch (submit->cmd[i].type) {
203                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
204                         break;
205                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
206                         if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
207                                 break;
208                         fallthrough;
209                 case MSM_SUBMIT_CMD_BUF:
210                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
211                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
212                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
213                         OUT_RING(ring, submit->cmd[i].size);
214                         ibs++;
215                         break;
216                 }
217
218                 /*
219                  * Periodically update shadow-wptr if needed, so that we
220                  * can see partial progress of submits with large # of
221                  * cmds.. otherwise we could needlessly stall waiting for
222                  * ringbuffer state, simply due to looking at a shadow
223                  * rptr value that has not been updated
224                  */
225                 if ((ibs % 32) == 0)
226                         update_shadow_rptr(gpu, ring);
227         }
228
229         get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
230                 rbmemptr_stats(ring, index, cpcycles_end));
231         get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
232                 rbmemptr_stats(ring, index, alwayson_end));
233
234         /* Write the fence to the scratch register */
235         OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
236         OUT_RING(ring, submit->seqno);
237
238         /*
239          * Execute a CACHE_FLUSH_TS event. This will ensure that the
240          * timestamp is written to the memory and then triggers the interrupt
241          */
242         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
243         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
244                 CP_EVENT_WRITE_0_IRQ);
245         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
246         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
247         OUT_RING(ring, submit->seqno);
248
249         trace_msm_gpu_submit_flush(submit,
250                 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO));
251
252         a6xx_flush(gpu, ring);
253 }
254
255 /* For a615 family (a615, a616, a618 and a619) */
256 const struct adreno_reglist a615_hwcg[] = {
257         {REG_A6XX_RBBM_CLOCK_CNTL_SP0,  0x02222222},
258         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
259         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
260         {REG_A6XX_RBBM_CLOCK_HYST_SP0,  0x0000F3CF},
261         {REG_A6XX_RBBM_CLOCK_CNTL_TP0,  0x02222222},
262         {REG_A6XX_RBBM_CLOCK_CNTL_TP1,  0x02222222},
263         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
264         {REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
265         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
266         {REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
267         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
268         {REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
269         {REG_A6XX_RBBM_CLOCK_HYST_TP0,  0x77777777},
270         {REG_A6XX_RBBM_CLOCK_HYST_TP1,  0x77777777},
271         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
272         {REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
273         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
274         {REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
275         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
276         {REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
277         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
278         {REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
279         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
280         {REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
281         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
282         {REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
283         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
284         {REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
285         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE,  0x22222222},
286         {REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
287         {REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
288         {REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
289         {REG_A6XX_RBBM_CLOCK_HYST_UCHE,  0x00000004},
290         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
291         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
292         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
293         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002020},
294         {REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
295         {REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
296         {REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
297         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
298         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00},
299         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00},
300         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00},
301         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
302         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
303         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
304         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
305         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
306         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
307         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
308         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
309         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
310         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
311         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
312         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
313         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
314         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
315         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
316         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
317         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
318         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
319         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
320         {},
321 };
322
323 const struct adreno_reglist a630_hwcg[] = {
324         {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
325         {REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222},
326         {REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222},
327         {REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222},
328         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220},
329         {REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220},
330         {REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220},
331         {REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220},
332         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
333         {REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
334         {REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
335         {REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
336         {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf},
337         {REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf},
338         {REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf},
339         {REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf},
340         {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
341         {REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222},
342         {REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222},
343         {REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222},
344         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
345         {REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
346         {REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
347         {REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
348         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
349         {REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
350         {REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222},
351         {REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222},
352         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
353         {REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
354         {REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222},
355         {REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222},
356         {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
357         {REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
358         {REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
359         {REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
360         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
361         {REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
362         {REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
363         {REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
364         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
365         {REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
366         {REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777},
367         {REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777},
368         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
369         {REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
370         {REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777},
371         {REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777},
372         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
373         {REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
374         {REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
375         {REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
376         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
377         {REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
378         {REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
379         {REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
380         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
381         {REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
382         {REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111},
383         {REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111},
384         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
385         {REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
386         {REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111},
387         {REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111},
388         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
389         {REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
390         {REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
391         {REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
392         {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
393         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
394         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
395         {REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
396         {REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
397         {REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
398         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
399         {REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222},
400         {REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222},
401         {REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222},
402         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
403         {REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
404         {REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
405         {REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
406         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00},
407         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00},
408         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00},
409         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00},
410         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
411         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
412         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
413         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
414         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
415         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
416         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
417         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
418         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
419         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
420         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
421         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
422         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
423         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
424         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
425         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
426         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
427         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
428         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
429         {},
430 };
431
432 const struct adreno_reglist a640_hwcg[] = {
433         {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
434         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
435         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
436         {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
437         {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
438         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
439         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
440         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
441         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
442         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
443         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
444         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
445         {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
446         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
447         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
448         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
449         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
450         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
451         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
452         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
453         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022},
454         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
455         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
456         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
457         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
458         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
459         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
460         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
461         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
462         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
463         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
464         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
465         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
466         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
467         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
468         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
469         {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
470         {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
471         {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
472         {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
473         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
474         {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
475         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
476         {REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
477         {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
478         {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
479         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
480         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
481         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
482         {},
483 };
484
485 const struct adreno_reglist a650_hwcg[] = {
486         {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
487         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
488         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
489         {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
490         {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
491         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
492         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
493         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
494         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
495         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
496         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
497         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
498         {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
499         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
500         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
501         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
502         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
503         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
504         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
505         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
506         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
507         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
508         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
509         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
510         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
511         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
512         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
513         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
514         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
515         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
516         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
517         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
518         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
519         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
520         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
521         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
522         {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
523         {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
524         {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
525         {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777},
526         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
527         {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
528         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
529         {REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
530         {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
531         {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
532         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
533         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
534         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
535         {},
536 };
537
538 const struct adreno_reglist a660_hwcg[] = {
539         {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
540         {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
541         {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
542         {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
543         {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
544         {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
545         {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
546         {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
547         {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
548         {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
549         {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
550         {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
551         {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
552         {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
553         {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
554         {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
555         {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
556         {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
557         {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
558         {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
559         {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
560         {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
561         {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
562         {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
563         {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
564         {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
565         {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
566         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
567         {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
568         {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
569         {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
570         {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
571         {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
572         {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
573         {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
574         {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
575         {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
576         {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
577         {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
578         {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
579         {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
580         {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
581         {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
582         {REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
583         {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
584         {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
585         {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
586         {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
587         {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
588         {},
589 };
590
591 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
592 {
593         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
594         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
595         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
596         const struct adreno_reglist *reg;
597         unsigned int i;
598         u32 val, clock_cntl_on;
599
600         if (!adreno_gpu->info->hwcg)
601                 return;
602
603         if (adreno_is_a630(adreno_gpu))
604                 clock_cntl_on = 0x8aa8aa02;
605         else
606                 clock_cntl_on = 0x8aa8aa82;
607
608         val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
609
610         /* Don't re-program the registers if they are already correct */
611         if ((!state && !val) || (state && (val == clock_cntl_on)))
612                 return;
613
614         /* Disable SP clock before programming HWCG registers */
615         gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
616
617         for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)
618                 gpu_write(gpu, reg->offset, state ? reg->value : 0);
619
620         /* Enable SP clock */
621         gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
622
623         gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
624 }
625
626 /* For a615, a616, a618, a619, a630, a640 and a680 */
627 static const u32 a6xx_protect[] = {
628         A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
629         A6XX_PROTECT_RDONLY(0x00501, 0x0005),
630         A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
631         A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
632         A6XX_PROTECT_NORDWR(0x00510, 0x0000),
633         A6XX_PROTECT_NORDWR(0x00534, 0x0000),
634         A6XX_PROTECT_NORDWR(0x00800, 0x0082),
635         A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
636         A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
637         A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
638         A6XX_PROTECT_NORDWR(0x00900, 0x004d),
639         A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
640         A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
641         A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
642         A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
643         A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
644         A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
645         A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
646         A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
647         A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
648         A6XX_PROTECT_NORDWR(0x09624, 0x01db),
649         A6XX_PROTECT_NORDWR(0x09e70, 0x0001),
650         A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
651         A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
652         A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
653         A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
654         A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
655         A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
656         A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
657         A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
658         A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
659         A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */
660 };
661
662 /* These are for a620 and a650 */
663 static const u32 a650_protect[] = {
664         A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
665         A6XX_PROTECT_RDONLY(0x00501, 0x0005),
666         A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
667         A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
668         A6XX_PROTECT_NORDWR(0x00510, 0x0000),
669         A6XX_PROTECT_NORDWR(0x00534, 0x0000),
670         A6XX_PROTECT_NORDWR(0x00800, 0x0082),
671         A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
672         A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
673         A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
674         A6XX_PROTECT_NORDWR(0x00900, 0x004d),
675         A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
676         A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
677         A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
678         A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
679         A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
680         A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
681         A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
682         A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
683         A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
684         A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
685         A6XX_PROTECT_NORDWR(0x09624, 0x01db),
686         A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
687         A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
688         A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
689         A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
690         A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
691         A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
692         A6XX_PROTECT_NORDWR(0x0b608, 0x0007),
693         A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
694         A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
695         A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
696         A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
697         A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
698         A6XX_PROTECT_NORDWR(0x1a800, 0x1fff),
699         A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
700         A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
701         A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
702         A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
703 };
704
705 /* These are for a635 and a660 */
706 static const u32 a660_protect[] = {
707         A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
708         A6XX_PROTECT_RDONLY(0x00501, 0x0005),
709         A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
710         A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
711         A6XX_PROTECT_NORDWR(0x00510, 0x0000),
712         A6XX_PROTECT_NORDWR(0x00534, 0x0000),
713         A6XX_PROTECT_NORDWR(0x00800, 0x0082),
714         A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
715         A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
716         A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
717         A6XX_PROTECT_NORDWR(0x00900, 0x004d),
718         A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
719         A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
720         A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
721         A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
722         A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
723         A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
724         A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
725         A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
726         A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
727         A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
728         A6XX_PROTECT_NORDWR(0x09624, 0x01db),
729         A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
730         A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
731         A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
732         A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
733         A6XX_PROTECT_NORDWR(0x0ae50, 0x012f),
734         A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
735         A6XX_PROTECT_NORDWR(0x0b608, 0x0006),
736         A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
737         A6XX_PROTECT_NORDWR(0x0be20, 0x015f),
738         A6XX_PROTECT_NORDWR(0x0d000, 0x05ff),
739         A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
740         A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
741         A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
742         A6XX_PROTECT_NORDWR(0x1a400, 0x1fff),
743         A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
744         A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
745         A6XX_PROTECT_NORDWR(0x1f860, 0x0000),
746         A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
747         A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
748 };
749
750 static void a6xx_set_cp_protect(struct msm_gpu *gpu)
751 {
752         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
753         const u32 *regs = a6xx_protect;
754         unsigned i, count, count_max;
755
756         if (adreno_is_a650(adreno_gpu)) {
757                 regs = a650_protect;
758                 count = ARRAY_SIZE(a650_protect);
759                 count_max = 48;
760                 BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
761         } else if (adreno_is_a660_family(adreno_gpu)) {
762                 regs = a660_protect;
763                 count = ARRAY_SIZE(a660_protect);
764                 count_max = 48;
765                 BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
766         } else {
767                 regs = a6xx_protect;
768                 count = ARRAY_SIZE(a6xx_protect);
769                 count_max = 32;
770                 BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
771         }
772
773         /*
774          * Enable access protection to privileged registers, fault on an access
775          * protect violation and select the last span to protect from the start
776          * address all the way to the end of the register address space
777          */
778         gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, BIT(0) | BIT(1) | BIT(3));
779
780         for (i = 0; i < count - 1; i++)
781                 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]);
782         /* last CP_PROTECT to have "infinite" length on the last entry */
783         gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
784 }
785
786 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
787 {
788         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
789         u32 lower_bit = 2;
790         u32 amsbc = 0;
791         u32 rgb565_predicator = 0;
792         u32 uavflagprd_inv = 0;
793
794         /* a618 is using the hw default values */
795         if (adreno_is_a618(adreno_gpu))
796                 return;
797
798         if (adreno_is_a640_family(adreno_gpu))
799                 amsbc = 1;
800
801         if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) {
802                 /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
803                 lower_bit = 3;
804                 amsbc = 1;
805                 rgb565_predicator = 1;
806                 uavflagprd_inv = 2;
807         }
808
809         if (adreno_is_7c3(adreno_gpu)) {
810                 lower_bit = 1;
811                 amsbc = 1;
812                 rgb565_predicator = 1;
813                 uavflagprd_inv = 2;
814         }
815
816         gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
817                 rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
818         gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
819         gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
820                 uavflagprd_inv << 4 | lower_bit << 1);
821         gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
822 }
823
824 static int a6xx_cp_init(struct msm_gpu *gpu)
825 {
826         struct msm_ringbuffer *ring = gpu->rb[0];
827
828         OUT_PKT7(ring, CP_ME_INIT, 8);
829
830         OUT_RING(ring, 0x0000002f);
831
832         /* Enable multiple hardware contexts */
833         OUT_RING(ring, 0x00000003);
834
835         /* Enable error detection */
836         OUT_RING(ring, 0x20000000);
837
838         /* Don't enable header dump */
839         OUT_RING(ring, 0x00000000);
840         OUT_RING(ring, 0x00000000);
841
842         /* No workarounds enabled */
843         OUT_RING(ring, 0x00000000);
844
845         /* Pad rest of the cmds with 0's */
846         OUT_RING(ring, 0x00000000);
847         OUT_RING(ring, 0x00000000);
848
849         a6xx_flush(gpu, ring);
850         return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
851 }
852
853 /*
854  * Check that the microcode version is new enough to include several key
855  * security fixes. Return true if the ucode is safe.
856  */
857 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
858                 struct drm_gem_object *obj)
859 {
860         struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
861         struct msm_gpu *gpu = &adreno_gpu->base;
862         const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
863         u32 *buf = msm_gem_get_vaddr(obj);
864         bool ret = false;
865
866         if (IS_ERR(buf))
867                 return false;
868
869         /*
870          * Targets up to a640 (a618, a630 and a640) need to check for a
871          * microcode version that is patched to support the whereami opcode or
872          * one that is new enough to include it by default.
873          *
874          * a650 tier targets don't need whereami but still need to be
875          * equal to or newer than 0.95 for other security fixes
876          *
877          * a660 targets have all the critical security fixes from the start
878          */
879         if (!strcmp(sqe_name, "a630_sqe.fw")) {
880                 /*
881                  * If the lowest nibble is 0xa that is an indication that this
882                  * microcode has been patched. The actual version is in dword
883                  * [3] but we only care about the patchlevel which is the lowest
884                  * nibble of dword [3]
885                  *
886                  * Otherwise check that the firmware is greater than or equal
887                  * to 1.90 which was the first version that had this fix built
888                  * in
889                  */
890                 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
891                         (buf[0] & 0xfff) >= 0x190) {
892                         a6xx_gpu->has_whereami = true;
893                         ret = true;
894                         goto out;
895                 }
896
897                 DRM_DEV_ERROR(&gpu->pdev->dev,
898                         "a630 SQE ucode is too old. Have version %x need at least %x\n",
899                         buf[0] & 0xfff, 0x190);
900         } else if (!strcmp(sqe_name, "a650_sqe.fw")) {
901                 if ((buf[0] & 0xfff) >= 0x095) {
902                         ret = true;
903                         goto out;
904                 }
905
906                 DRM_DEV_ERROR(&gpu->pdev->dev,
907                         "a650 SQE ucode is too old. Have version %x need at least %x\n",
908                         buf[0] & 0xfff, 0x095);
909         } else if (!strcmp(sqe_name, "a660_sqe.fw")) {
910                 ret = true;
911         } else {
912                 DRM_DEV_ERROR(&gpu->pdev->dev,
913                         "unknown GPU, add it to a6xx_ucode_check_version()!!\n");
914         }
915 out:
916         msm_gem_put_vaddr(obj);
917         return ret;
918 }
919
920 static int a6xx_ucode_init(struct msm_gpu *gpu)
921 {
922         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
923         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
924
925         if (!a6xx_gpu->sqe_bo) {
926                 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
927                         adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
928
929                 if (IS_ERR(a6xx_gpu->sqe_bo)) {
930                         int ret = PTR_ERR(a6xx_gpu->sqe_bo);
931
932                         a6xx_gpu->sqe_bo = NULL;
933                         DRM_DEV_ERROR(&gpu->pdev->dev,
934                                 "Could not allocate SQE ucode: %d\n", ret);
935
936                         return ret;
937                 }
938
939                 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
940                 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
941                         msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
942                         drm_gem_object_put(a6xx_gpu->sqe_bo);
943
944                         a6xx_gpu->sqe_bo = NULL;
945                         return -EPERM;
946                 }
947         }
948
949         gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
950
951         return 0;
952 }
953
954 static int a6xx_zap_shader_init(struct msm_gpu *gpu)
955 {
956         static bool loaded;
957         int ret;
958
959         if (loaded)
960                 return 0;
961
962         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
963
964         loaded = !ret;
965         return ret;
966 }
967
968 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
969           A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
970           A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
971           A6XX_RBBM_INT_0_MASK_CP_IB2 | \
972           A6XX_RBBM_INT_0_MASK_CP_IB1 | \
973           A6XX_RBBM_INT_0_MASK_CP_RB | \
974           A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
975           A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
976           A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
977           A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
978           A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
979
980 static int hw_init(struct msm_gpu *gpu)
981 {
982         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
983         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
984         int ret;
985
986         /* Make sure the GMU keeps the GPU on while we set it up */
987         a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
988
989         /* Clear GBIF halt in case GX domain was not collapsed */
990         if (a6xx_has_gbif(adreno_gpu))
991                 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
992
993         gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
994
995         /*
996          * Disable the trusted memory range - we don't actually supported secure
997          * memory rendering at this point in time and we don't want to block off
998          * part of the virtual memory space.
999          */
1000         gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
1001         gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1002
1003         /* Turn on 64 bit addressing for all blocks */
1004         gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1005         gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1006         gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1007         gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1008         gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1009         gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1010         gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1011         gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1012         gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1013         gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1014         gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1015         gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
1016
1017         /* enable hardware clockgating */
1018         a6xx_set_hwcg(gpu, true);
1019
1020         /* VBIF/GBIF start*/
1021         if (adreno_is_a640_family(adreno_gpu) ||
1022             adreno_is_a650_family(adreno_gpu)) {
1023                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1024                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1025                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1026                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1027                 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1028                 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
1029         } else {
1030                 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1031         }
1032
1033         if (adreno_is_a630(adreno_gpu))
1034                 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1035
1036         /* Make all blocks contribute to the GPU BUSY perf counter */
1037         gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1038
1039         /* Disable L2 bypass in the UCHE */
1040         gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0);
1041         gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff);
1042         gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_LO, 0xfffff000);
1043         gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff);
1044         gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000);
1045         gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
1046
1047         if (!adreno_is_a650_family(adreno_gpu)) {
1048                 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1049                 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
1050
1051                 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
1052                         0x00100000 + adreno_gpu->gmem - 1);
1053         }
1054
1055         gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1056         gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
1057
1058         if (adreno_is_a640_family(adreno_gpu) ||
1059             adreno_is_a650_family(adreno_gpu))
1060                 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1061         else
1062                 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1063         gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1064
1065         if (adreno_is_a660_family(adreno_gpu))
1066                 gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1067
1068         /* Setting the mem pool size */
1069         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1070
1071         /* Setting the primFifo thresholds default values,
1072          * and vccCacheSkipDis=1 bit (0x200) for A640 and newer
1073         */
1074         if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
1075                 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1076         else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu))
1077                 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200);
1078         else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
1079                 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1080         else
1081                 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000);
1082
1083         /* Set the AHB default slave response to "ERROR" */
1084         gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1085
1086         /* Turn on performance counters */
1087         gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1088
1089         /* Select CP0 to always count cycles */
1090         gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1091
1092         a6xx_set_ubwc_config(gpu);
1093
1094         /* Enable fault detection */
1095         gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL,
1096                 (1 << 30) | 0x1fffff);
1097
1098         gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
1099
1100         /* Set weights for bicubic filtering */
1101         if (adreno_is_a650_family(adreno_gpu)) {
1102                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
1103                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
1104                         0x3fe05ff4);
1105                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
1106                         0x3fa0ebee);
1107                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
1108                         0x3f5193ed);
1109                 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
1110                         0x3f0243f0);
1111         }
1112
1113         /* Protect registers from the CP */
1114         a6xx_set_cp_protect(gpu);
1115
1116         if (adreno_is_a660_family(adreno_gpu)) {
1117                 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1118                 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1119         }
1120
1121         /* Set dualQ + disable afull for A660 GPU */
1122         if (adreno_is_a660(adreno_gpu))
1123                 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1124
1125         /* Enable expanded apriv for targets that support it */
1126         if (gpu->hw_apriv) {
1127                 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1128                         (1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1));
1129         }
1130
1131         /* Enable interrupts */
1132         gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK);
1133
1134         ret = adreno_hw_init(gpu);
1135         if (ret)
1136                 goto out;
1137
1138         ret = a6xx_ucode_init(gpu);
1139         if (ret)
1140                 goto out;
1141
1142         /* Set the ringbuffer address */
1143         gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1144
1145         /* Targets that support extended APRIV can use the RPTR shadow from
1146          * hardware but all the other ones need to disable the feature. Targets
1147          * that support the WHERE_AM_I opcode can use that instead
1148          */
1149         if (adreno_gpu->base.hw_apriv)
1150                 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1151         else
1152                 gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1153                         MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1154
1155         /*
1156          * Expanded APRIV and targets that support WHERE_AM_I both need a
1157          * privileged buffer to store the RPTR shadow
1158          */
1159
1160         if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) {
1161                 if (!a6xx_gpu->shadow_bo) {
1162                         a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
1163                                 sizeof(u32) * gpu->nr_rings,
1164                                 MSM_BO_WC | MSM_BO_MAP_PRIV,
1165                                 gpu->aspace, &a6xx_gpu->shadow_bo,
1166                                 &a6xx_gpu->shadow_iova);
1167
1168                         if (IS_ERR(a6xx_gpu->shadow))
1169                                 return PTR_ERR(a6xx_gpu->shadow);
1170
1171                         msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
1172                 }
1173
1174                 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
1175                         shadowptr(a6xx_gpu, gpu->rb[0]));
1176         }
1177
1178         /* Always come up on rb 0 */
1179         a6xx_gpu->cur_ring = gpu->rb[0];
1180
1181         gpu->cur_ctx_seqno = 0;
1182
1183         /* Enable the SQE_to start the CP engine */
1184         gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1185
1186         ret = a6xx_cp_init(gpu);
1187         if (ret)
1188                 goto out;
1189
1190         /*
1191          * Try to load a zap shader into the secure world. If successful
1192          * we can use the CP to switch out of secure mode. If not then we
1193          * have no resource but to try to switch ourselves out manually. If we
1194          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1195          * be blocked and a permissions violation will soon follow.
1196          */
1197         ret = a6xx_zap_shader_init(gpu);
1198         if (!ret) {
1199                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1200                 OUT_RING(gpu->rb[0], 0x00000000);
1201
1202                 a6xx_flush(gpu, gpu->rb[0]);
1203                 if (!a6xx_idle(gpu, gpu->rb[0]))
1204                         return -EINVAL;
1205         } else if (ret == -ENODEV) {
1206                 /*
1207                  * This device does not use zap shader (but print a warning
1208                  * just in case someone got their dt wrong.. hopefully they
1209                  * have a debug UART to realize the error of their ways...
1210                  * if you mess this up you are about to crash horribly)
1211                  */
1212                 dev_warn_once(gpu->dev->dev,
1213                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1214                 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1215                 ret = 0;
1216         } else {
1217                 return ret;
1218         }
1219
1220 out:
1221         /*
1222          * Tell the GMU that we are done touching the GPU and it can start power
1223          * management
1224          */
1225         a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1226
1227         if (a6xx_gpu->gmu.legacy) {
1228                 /* Take the GMU out of its special boot mode */
1229                 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1230         }
1231
1232         return ret;
1233 }
1234
1235 static int a6xx_hw_init(struct msm_gpu *gpu)
1236 {
1237         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1238         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1239         int ret;
1240
1241         mutex_lock(&a6xx_gpu->gmu.lock);
1242         ret = hw_init(gpu);
1243         mutex_unlock(&a6xx_gpu->gmu.lock);
1244
1245         return ret;
1246 }
1247
1248 static void a6xx_dump(struct msm_gpu *gpu)
1249 {
1250         DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n",
1251                         gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1252         adreno_dump(gpu);
1253 }
1254
1255 #define VBIF_RESET_ACK_TIMEOUT  100
1256 #define VBIF_RESET_ACK_MASK     0x00f0
1257
1258 static void a6xx_recover(struct msm_gpu *gpu)
1259 {
1260         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1261         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1262         int i, active_submits;
1263
1264         adreno_dump_info(gpu);
1265
1266         for (i = 0; i < 8; i++)
1267                 DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
1268                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
1269
1270         if (hang_debug)
1271                 a6xx_dump(gpu);
1272
1273         /* Halt SQE first */
1274         gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1275
1276         /*
1277          * Turn off keep alive that might have been enabled by the hang
1278          * interrupt
1279          */
1280         gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);
1281
1282         pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1283
1284         /* active_submit won't change until we make a submission */
1285         mutex_lock(&gpu->active_lock);
1286         active_submits = gpu->active_submits;
1287
1288         /*
1289          * Temporarily clear active_submits count to silence a WARN() in the
1290          * runtime suspend cb
1291          */
1292         gpu->active_submits = 0;
1293
1294         /* Drop the rpm refcount from active submits */
1295         if (active_submits)
1296                 pm_runtime_put(&gpu->pdev->dev);
1297
1298         /* And the final one from recover worker */
1299         pm_runtime_put_sync(&gpu->pdev->dev);
1300
1301         /* Call into gpucc driver to poll for cx gdsc collapse */
1302         reset_control_reset(gpu->cx_collapse);
1303
1304         pm_runtime_use_autosuspend(&gpu->pdev->dev);
1305
1306         if (active_submits)
1307                 pm_runtime_get(&gpu->pdev->dev);
1308
1309         pm_runtime_get_sync(&gpu->pdev->dev);
1310
1311         gpu->active_submits = active_submits;
1312         mutex_unlock(&gpu->active_lock);
1313
1314         msm_gpu_hw_init(gpu);
1315 }
1316
1317 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1318 {
1319         static const char *uche_clients[7] = {
1320                 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1321         };
1322         u32 val;
1323
1324         if (mid < 1 || mid > 3)
1325                 return "UNKNOWN";
1326
1327         /*
1328          * The source of the data depends on the mid ID read from FSYNR1.
1329          * and the client ID read from the UCHE block
1330          */
1331         val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1332
1333         /* mid = 3 is most precise and refers to only one block per client */
1334         if (mid == 3)
1335                 return uche_clients[val & 7];
1336
1337         /* For mid=2 the source is TP or VFD except when the client id is 0 */
1338         if (mid == 2)
1339                 return ((val & 7) == 0) ? "TP" : "TP|VFD";
1340
1341         /* For mid=1 just return "UCHE" as a catchall for everything else */
1342         return "UCHE";
1343 }
1344
1345 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1346 {
1347         if (id == 0)
1348                 return "CP";
1349         else if (id == 4)
1350                 return "CCU";
1351         else if (id == 6)
1352                 return "CDP Prefetch";
1353
1354         return a6xx_uche_fault_block(gpu, id);
1355 }
1356
1357 #define ARM_SMMU_FSR_TF                 BIT(1)
1358 #define ARM_SMMU_FSR_PF                 BIT(3)
1359 #define ARM_SMMU_FSR_EF                 BIT(4)
1360
1361 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1362 {
1363         struct msm_gpu *gpu = arg;
1364         struct adreno_smmu_fault_info *info = data;
1365         const char *type = "UNKNOWN";
1366         const char *block;
1367         bool do_devcoredump = info && !READ_ONCE(gpu->crashstate);
1368
1369         /*
1370          * If we aren't going to be resuming later from fault_worker, then do
1371          * it now.
1372          */
1373         if (!do_devcoredump) {
1374                 gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
1375         }
1376
1377         /*
1378          * Print a default message if we couldn't get the data from the
1379          * adreno-smmu-priv
1380          */
1381         if (!info) {
1382                 pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d (%u,%u,%u,%u)\n",
1383                         iova, flags,
1384                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1385                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1386                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1387                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
1388
1389                 return 0;
1390         }
1391
1392         if (info->fsr & ARM_SMMU_FSR_TF)
1393                 type = "TRANSLATION";
1394         else if (info->fsr & ARM_SMMU_FSR_PF)
1395                 type = "PERMISSION";
1396         else if (info->fsr & ARM_SMMU_FSR_EF)
1397                 type = "EXTERNAL";
1398
1399         block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1400
1401         pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n",
1402                         info->ttbr0, iova,
1403                         flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ",
1404                         type, block,
1405                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1406                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1407                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1408                         gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
1409
1410         if (do_devcoredump) {
1411                 /* Turn off the hangcheck timer to keep it from bothering us */
1412                 del_timer(&gpu->hangcheck_timer);
1413
1414                 gpu->fault_info.ttbr0 = info->ttbr0;
1415                 gpu->fault_info.iova  = iova;
1416                 gpu->fault_info.flags = flags;
1417                 gpu->fault_info.type  = type;
1418                 gpu->fault_info.block = block;
1419
1420                 kthread_queue_work(gpu->worker, &gpu->fault_work);
1421         }
1422
1423         return 0;
1424 }
1425
1426 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1427 {
1428         u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1429
1430         if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1431                 u32 val;
1432
1433                 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1434                 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1435                 dev_err_ratelimited(&gpu->pdev->dev,
1436                         "CP | opcode error | possible opcode=0x%8.8X\n",
1437                         val);
1438         }
1439
1440         if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1441                 dev_err_ratelimited(&gpu->pdev->dev,
1442                         "CP ucode error interrupt\n");
1443
1444         if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1445                 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1446                         gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1447
1448         if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1449                 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1450
1451                 dev_err_ratelimited(&gpu->pdev->dev,
1452                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1453                         val & (1 << 20) ? "READ" : "WRITE",
1454                         (val & 0x3ffff), val);
1455         }
1456
1457         if (status & A6XX_CP_INT_CP_AHB_ERROR)
1458                 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1459
1460         if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1461                 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1462
1463         if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1464                 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1465
1466 }
1467
1468 static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1469 {
1470         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1471         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1472         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1473
1474         /*
1475          * If stalled on SMMU fault, we could trip the GPU's hang detection,
1476          * but the fault handler will trigger the devcore dump, and we want
1477          * to otherwise resume normally rather than killing the submit, so
1478          * just bail.
1479          */
1480         if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1481                 return;
1482
1483         /*
1484          * Force the GPU to stay on until after we finish
1485          * collecting information
1486          */
1487         gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1488
1489         DRM_DEV_ERROR(&gpu->pdev->dev,
1490                 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1491                 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1492                 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1493                 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1494                 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1495                 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1496                 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1497                 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1498                 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1499
1500         /* Turn off the hangcheck timer to keep it from bothering us */
1501         del_timer(&gpu->hangcheck_timer);
1502
1503         kthread_queue_work(gpu->worker, &gpu->recover_work);
1504 }
1505
1506 static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1507 {
1508         struct msm_drm_private *priv = gpu->dev->dev_private;
1509         u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1510
1511         gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1512
1513         if (priv->disable_err_irq)
1514                 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1515
1516         if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1517                 a6xx_fault_detect_irq(gpu);
1518
1519         if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1520                 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1521
1522         if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1523                 a6xx_cp_hw_err_irq(gpu);
1524
1525         if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1526                 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1527
1528         if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1529                 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1530
1531         if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1532                 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1533
1534         if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
1535                 msm_gpu_retire(gpu);
1536
1537         return IRQ_HANDLED;
1538 }
1539
1540 static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
1541 {
1542         return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
1543 }
1544
1545 static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
1546 {
1547         msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
1548 }
1549
1550 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1551 {
1552         llcc_slice_deactivate(a6xx_gpu->llc_slice);
1553         llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1554 }
1555
1556 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1557 {
1558         struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1559         struct msm_gpu *gpu = &adreno_gpu->base;
1560         u32 cntl1_regval = 0;
1561
1562         if (IS_ERR(a6xx_gpu->llc_mmio))
1563                 return;
1564
1565         if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1566                 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1567
1568                 gpu_scid &= 0x1f;
1569                 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1570                                (gpu_scid << 15) | (gpu_scid << 20);
1571
1572                 /* On A660, the SCID programming for UCHE traffic is done in
1573                  * A6XX_GBIF_SCACHE_CNTL0[14:10]
1574                  */
1575                 if (adreno_is_a660_family(adreno_gpu))
1576                         gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1577                                 (1 << 8), (gpu_scid << 10) | (1 << 8));
1578         }
1579
1580         /*
1581          * For targets with a MMU500, activate the slice but don't program the
1582          * register.  The XBL will take care of that.
1583          */
1584         if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1585                 if (!a6xx_gpu->have_mmu500) {
1586                         u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1587
1588                         gpuhtw_scid &= 0x1f;
1589                         cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1590                 }
1591         }
1592
1593         if (!cntl1_regval)
1594                 return;
1595
1596         /*
1597          * Program the slice IDs for the various GPU blocks and GPU MMU
1598          * pagetables
1599          */
1600         if (!a6xx_gpu->have_mmu500) {
1601                 a6xx_llc_write(a6xx_gpu,
1602                         REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1603
1604                 /*
1605                  * Program cacheability overrides to not allocate cache
1606                  * lines on a write miss
1607                  */
1608                 a6xx_llc_rmw(a6xx_gpu,
1609                         REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1610                 return;
1611         }
1612
1613         gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
1614 }
1615
1616 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1617 {
1618         llcc_slice_putd(a6xx_gpu->llc_slice);
1619         llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1620 }
1621
1622 static void a6xx_llc_slices_init(struct platform_device *pdev,
1623                 struct a6xx_gpu *a6xx_gpu)
1624 {
1625         struct device_node *phandle;
1626
1627         /*
1628          * There is a different programming path for targets with an mmu500
1629          * attached, so detect if that is the case
1630          */
1631         phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
1632         a6xx_gpu->have_mmu500 = (phandle &&
1633                 of_device_is_compatible(phandle, "arm,mmu-500"));
1634         of_node_put(phandle);
1635
1636         if (a6xx_gpu->have_mmu500)
1637                 a6xx_gpu->llc_mmio = NULL;
1638         else
1639                 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
1640
1641         a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1642         a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1643
1644         if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1645                 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1646 }
1647
1648 static int a6xx_pm_resume(struct msm_gpu *gpu)
1649 {
1650         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1651         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1652         int ret;
1653
1654         gpu->needs_hw_init = true;
1655
1656         trace_msm_gpu_resume(0);
1657
1658         mutex_lock(&a6xx_gpu->gmu.lock);
1659         ret = a6xx_gmu_resume(a6xx_gpu);
1660         mutex_unlock(&a6xx_gpu->gmu.lock);
1661         if (ret)
1662                 return ret;
1663
1664         msm_devfreq_resume(gpu);
1665
1666         a6xx_llc_activate(a6xx_gpu);
1667
1668         return 0;
1669 }
1670
1671 static int a6xx_pm_suspend(struct msm_gpu *gpu)
1672 {
1673         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1674         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1675         int i, ret;
1676
1677         trace_msm_gpu_suspend(0);
1678
1679         a6xx_llc_deactivate(a6xx_gpu);
1680
1681         msm_devfreq_suspend(gpu);
1682
1683         mutex_lock(&a6xx_gpu->gmu.lock);
1684         ret = a6xx_gmu_stop(a6xx_gpu);
1685         mutex_unlock(&a6xx_gpu->gmu.lock);
1686         if (ret)
1687                 return ret;
1688
1689         if (a6xx_gpu->shadow_bo)
1690                 for (i = 0; i < gpu->nr_rings; i++)
1691                         a6xx_gpu->shadow[i] = 0;
1692
1693         gpu->suspend_count++;
1694
1695         return 0;
1696 }
1697
1698 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1699 {
1700         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1701         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1702
1703         mutex_lock(&a6xx_gpu->gmu.lock);
1704
1705         /* Force the GPU power on so we can read this register */
1706         a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1707
1708         *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO);
1709
1710         a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1711
1712         mutex_unlock(&a6xx_gpu->gmu.lock);
1713
1714         return 0;
1715 }
1716
1717 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
1718 {
1719         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1720         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1721
1722         return a6xx_gpu->cur_ring;
1723 }
1724
1725 static void a6xx_destroy(struct msm_gpu *gpu)
1726 {
1727         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1728         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1729
1730         if (a6xx_gpu->sqe_bo) {
1731                 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
1732                 drm_gem_object_put(a6xx_gpu->sqe_bo);
1733         }
1734
1735         if (a6xx_gpu->shadow_bo) {
1736                 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
1737                 drm_gem_object_put(a6xx_gpu->shadow_bo);
1738         }
1739
1740         a6xx_llc_slices_destroy(a6xx_gpu);
1741
1742         a6xx_gmu_remove(a6xx_gpu);
1743
1744         adreno_gpu_cleanup(adreno_gpu);
1745
1746         kfree(a6xx_gpu);
1747 }
1748
1749 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1750 {
1751         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1752         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1753         u64 busy_cycles;
1754
1755         /* 19.2MHz */
1756         *out_sample_rate = 19200000;
1757
1758         busy_cycles = gmu_read64(&a6xx_gpu->gmu,
1759                         REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
1760                         REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
1761
1762         return busy_cycles;
1763 }
1764
1765 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
1766                               bool suspended)
1767 {
1768         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1769         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1770
1771         mutex_lock(&a6xx_gpu->gmu.lock);
1772         a6xx_gmu_set_freq(gpu, opp, suspended);
1773         mutex_unlock(&a6xx_gpu->gmu.lock);
1774 }
1775
1776 static struct msm_gem_address_space *
1777 a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
1778 {
1779         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1780         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1781         unsigned long quirks = 0;
1782
1783         /*
1784          * This allows GPU to set the bus attributes required to use system
1785          * cache on behalf of the iommu page table walker.
1786          */
1787         if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1788                 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
1789
1790         return adreno_iommu_create_address_space(gpu, pdev, quirks);
1791 }
1792
1793 static struct msm_gem_address_space *
1794 a6xx_create_private_address_space(struct msm_gpu *gpu)
1795 {
1796         struct msm_mmu *mmu;
1797
1798         mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
1799
1800         if (IS_ERR(mmu))
1801                 return ERR_CAST(mmu);
1802
1803         return msm_gem_address_space_create(mmu,
1804                 "gpu", 0x100000000ULL,
1805                 adreno_private_address_space_size(gpu));
1806 }
1807
1808 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1809 {
1810         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1811         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1812
1813         if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
1814                 return a6xx_gpu->shadow[ring->id];
1815
1816         return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
1817 }
1818
1819 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1820 {
1821         struct msm_cp_state cp_state = {
1822                 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1823                 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1824                 .ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1825                 .ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
1826         };
1827         bool progress;
1828
1829         /*
1830          * Adjust the remaining data to account for what has already been
1831          * fetched from memory, but not yet consumed by the SQE.
1832          *
1833          * This is not *technically* correct, the amount buffered could
1834          * exceed the IB size due to hw prefetching ahead, but:
1835          *
1836          * (1) We aren't trying to find the exact position, just whether
1837          *     progress has been made
1838          * (2) The CP_REG_TO_MEM at the end of a submit should be enough
1839          *     to prevent prefetching into an unrelated submit.  (And
1840          *     either way, at some point the ROQ will be full.)
1841          */
1842         cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
1843         cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16;
1844
1845         progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
1846
1847         ring->last_cp_state = cp_state;
1848
1849         return progress;
1850 }
1851
1852 static u32 a618_get_speed_bin(u32 fuse)
1853 {
1854         if (fuse == 0)
1855                 return 0;
1856         else if (fuse == 169)
1857                 return 1;
1858         else if (fuse == 174)
1859                 return 2;
1860
1861         return UINT_MAX;
1862 }
1863
1864 static u32 a619_get_speed_bin(u32 fuse)
1865 {
1866         if (fuse == 0)
1867                 return 0;
1868         else if (fuse == 120)
1869                 return 4;
1870         else if (fuse == 138)
1871                 return 3;
1872         else if (fuse == 169)
1873                 return 2;
1874         else if (fuse == 180)
1875                 return 1;
1876
1877         return UINT_MAX;
1878 }
1879
1880 static u32 adreno_7c3_get_speed_bin(u32 fuse)
1881 {
1882         if (fuse == 0)
1883                 return 0;
1884         else if (fuse == 117)
1885                 return 0;
1886         else if (fuse == 190)
1887                 return 1;
1888
1889         return UINT_MAX;
1890 }
1891
1892 static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
1893 {
1894         u32 val = UINT_MAX;
1895
1896         if (adreno_cmp_rev(ADRENO_REV(6, 1, 8, ANY_ID), rev))
1897                 val = a618_get_speed_bin(fuse);
1898
1899         if (adreno_cmp_rev(ADRENO_REV(6, 1, 9, ANY_ID), rev))
1900                 val = a619_get_speed_bin(fuse);
1901
1902         if (adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), rev))
1903                 val = adreno_7c3_get_speed_bin(fuse);
1904
1905         if (val == UINT_MAX) {
1906                 DRM_DEV_ERROR(dev,
1907                         "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
1908                         fuse);
1909                 return UINT_MAX;
1910         }
1911
1912         return (1 << val);
1913 }
1914
1915 static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
1916 {
1917         u32 supp_hw;
1918         u32 speedbin;
1919         int ret;
1920
1921         ret = adreno_read_speedbin(dev, &speedbin);
1922         /*
1923          * -ENOENT means that the platform doesn't support speedbin which is
1924          * fine
1925          */
1926         if (ret == -ENOENT) {
1927                 return 0;
1928         } else if (ret) {
1929                 dev_err_probe(dev, ret,
1930                               "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
1931                 return ret;
1932         }
1933
1934         supp_hw = fuse_to_supp_hw(dev, rev, speedbin);
1935
1936         ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
1937         if (ret)
1938                 return ret;
1939
1940         return 0;
1941 }
1942
1943 static const struct adreno_gpu_funcs funcs = {
1944         .base = {
1945                 .get_param = adreno_get_param,
1946                 .set_param = adreno_set_param,
1947                 .hw_init = a6xx_hw_init,
1948                 .pm_suspend = a6xx_pm_suspend,
1949                 .pm_resume = a6xx_pm_resume,
1950                 .recover = a6xx_recover,
1951                 .submit = a6xx_submit,
1952                 .active_ring = a6xx_active_ring,
1953                 .irq = a6xx_irq,
1954                 .destroy = a6xx_destroy,
1955 #if defined(CONFIG_DRM_MSM_GPU_STATE)
1956                 .show = a6xx_show,
1957 #endif
1958                 .gpu_busy = a6xx_gpu_busy,
1959                 .gpu_get_freq = a6xx_gmu_get_freq,
1960                 .gpu_set_freq = a6xx_gpu_set_freq,
1961 #if defined(CONFIG_DRM_MSM_GPU_STATE)
1962                 .gpu_state_get = a6xx_gpu_state_get,
1963                 .gpu_state_put = a6xx_gpu_state_put,
1964 #endif
1965                 .create_address_space = a6xx_create_address_space,
1966                 .create_private_address_space = a6xx_create_private_address_space,
1967                 .get_rptr = a6xx_get_rptr,
1968                 .progress = a6xx_progress,
1969         },
1970         .get_timestamp = a6xx_get_timestamp,
1971 };
1972
1973 struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
1974 {
1975         struct msm_drm_private *priv = dev->dev_private;
1976         struct platform_device *pdev = priv->gpu_pdev;
1977         struct adreno_platform_config *config = pdev->dev.platform_data;
1978         const struct adreno_info *info;
1979         struct device_node *node;
1980         struct a6xx_gpu *a6xx_gpu;
1981         struct adreno_gpu *adreno_gpu;
1982         struct msm_gpu *gpu;
1983         int ret;
1984
1985         a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
1986         if (!a6xx_gpu)
1987                 return ERR_PTR(-ENOMEM);
1988
1989         adreno_gpu = &a6xx_gpu->base;
1990         gpu = &adreno_gpu->base;
1991
1992         adreno_gpu->registers = NULL;
1993
1994         /*
1995          * We need to know the platform type before calling into adreno_gpu_init
1996          * so that the hw_apriv flag can be correctly set. Snoop into the info
1997          * and grab the revision number
1998          */
1999         info = adreno_info(config->rev);
2000
2001         if (info && (info->revn == 650 || info->revn == 660 ||
2002                         adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev)))
2003                 adreno_gpu->base.hw_apriv = true;
2004
2005         a6xx_llc_slices_init(pdev, a6xx_gpu);
2006
2007         ret = a6xx_set_supported_hw(&pdev->dev, config->rev);
2008         if (ret) {
2009                 a6xx_destroy(&(a6xx_gpu->base.base));
2010                 return ERR_PTR(ret);
2011         }
2012
2013         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
2014         if (ret) {
2015                 a6xx_destroy(&(a6xx_gpu->base.base));
2016                 return ERR_PTR(ret);
2017         }
2018
2019         /*
2020          * For now only clamp to idle freq for devices where this is known not
2021          * to cause power supply issues:
2022          */
2023         if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2024                 gpu->clamp_to_idle = true;
2025
2026         /* Check if there is a GMU phandle and set it up */
2027         node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2028
2029         /* FIXME: How do we gracefully handle this? */
2030         BUG_ON(!node);
2031
2032         ret = a6xx_gmu_init(a6xx_gpu, node);
2033         of_node_put(node);
2034         if (ret) {
2035                 a6xx_destroy(&(a6xx_gpu->base.base));
2036                 return ERR_PTR(ret);
2037         }
2038
2039         if (gpu->aspace)
2040                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
2041                                 a6xx_fault_handler);
2042
2043         return gpu;
2044 }