Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142                                           bool enable);
143
144 /* get temperature in millidegrees */
145 int ci_get_temp(struct radeon_device *rdev)
146 {
147         u32 temp;
148         int actual_temp = 0;
149
150         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
151                 CTF_TEMP_SHIFT;
152
153         if (temp & 0x200)
154                 actual_temp = 255;
155         else
156                 actual_temp = temp & 0x1ff;
157
158         actual_temp = actual_temp * 1000;
159
160         return actual_temp;
161 }
162
163 /* get temperature in millidegrees */
164 int kv_get_temp(struct radeon_device *rdev)
165 {
166         u32 temp;
167         int actual_temp = 0;
168
169         temp = RREG32_SMC(0xC0300E0C);
170
171         if (temp)
172                 actual_temp = (temp / 8) - 49;
173         else
174                 actual_temp = 0;
175
176         actual_temp = actual_temp * 1000;
177
178         return actual_temp;
179 }
180
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
185 {
186         unsigned long flags;
187         u32 r;
188
189         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
190         WREG32(PCIE_INDEX, reg);
191         (void)RREG32(PCIE_INDEX);
192         r = RREG32(PCIE_DATA);
193         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
194         return r;
195 }
196
197 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
198 {
199         unsigned long flags;
200
201         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
202         WREG32(PCIE_INDEX, reg);
203         (void)RREG32(PCIE_INDEX);
204         WREG32(PCIE_DATA, v);
205         (void)RREG32(PCIE_DATA);
206         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
207 }
208
209 static const u32 spectre_rlc_save_restore_register_list[] =
210 {
211         (0x0e00 << 16) | (0xc12c >> 2),
212         0x00000000,
213         (0x0e00 << 16) | (0xc140 >> 2),
214         0x00000000,
215         (0x0e00 << 16) | (0xc150 >> 2),
216         0x00000000,
217         (0x0e00 << 16) | (0xc15c >> 2),
218         0x00000000,
219         (0x0e00 << 16) | (0xc168 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0xc170 >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0xc178 >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0xc204 >> 2),
226         0x00000000,
227         (0x0e00 << 16) | (0xc2b4 >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc2b8 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc2bc >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0xc2c0 >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0x8228 >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0x829c >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0x869c >> 2),
240         0x00000000,
241         (0x0600 << 16) | (0x98f4 >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0x98f8 >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0x9900 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0xc260 >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0x90e8 >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0x3c000 >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0x3c00c >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0x8c1c >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0x9700 >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0xcd20 >> 2),
260         0x00000000,
261         (0x4e00 << 16) | (0xcd20 >> 2),
262         0x00000000,
263         (0x5e00 << 16) | (0xcd20 >> 2),
264         0x00000000,
265         (0x6e00 << 16) | (0xcd20 >> 2),
266         0x00000000,
267         (0x7e00 << 16) | (0xcd20 >> 2),
268         0x00000000,
269         (0x8e00 << 16) | (0xcd20 >> 2),
270         0x00000000,
271         (0x9e00 << 16) | (0xcd20 >> 2),
272         0x00000000,
273         (0xae00 << 16) | (0xcd20 >> 2),
274         0x00000000,
275         (0xbe00 << 16) | (0xcd20 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0x89bc >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0x8900 >> 2),
280         0x00000000,
281         0x3,
282         (0x0e00 << 16) | (0xc130 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0xc134 >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0xc1fc >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0xc208 >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0xc264 >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0xc268 >> 2),
293         0x00000000,
294         (0x0e00 << 16) | (0xc26c >> 2),
295         0x00000000,
296         (0x0e00 << 16) | (0xc270 >> 2),
297         0x00000000,
298         (0x0e00 << 16) | (0xc274 >> 2),
299         0x00000000,
300         (0x0e00 << 16) | (0xc278 >> 2),
301         0x00000000,
302         (0x0e00 << 16) | (0xc27c >> 2),
303         0x00000000,
304         (0x0e00 << 16) | (0xc280 >> 2),
305         0x00000000,
306         (0x0e00 << 16) | (0xc284 >> 2),
307         0x00000000,
308         (0x0e00 << 16) | (0xc288 >> 2),
309         0x00000000,
310         (0x0e00 << 16) | (0xc28c >> 2),
311         0x00000000,
312         (0x0e00 << 16) | (0xc290 >> 2),
313         0x00000000,
314         (0x0e00 << 16) | (0xc294 >> 2),
315         0x00000000,
316         (0x0e00 << 16) | (0xc298 >> 2),
317         0x00000000,
318         (0x0e00 << 16) | (0xc29c >> 2),
319         0x00000000,
320         (0x0e00 << 16) | (0xc2a0 >> 2),
321         0x00000000,
322         (0x0e00 << 16) | (0xc2a4 >> 2),
323         0x00000000,
324         (0x0e00 << 16) | (0xc2a8 >> 2),
325         0x00000000,
326         (0x0e00 << 16) | (0xc2ac  >> 2),
327         0x00000000,
328         (0x0e00 << 16) | (0xc2b0 >> 2),
329         0x00000000,
330         (0x0e00 << 16) | (0x301d0 >> 2),
331         0x00000000,
332         (0x0e00 << 16) | (0x30238 >> 2),
333         0x00000000,
334         (0x0e00 << 16) | (0x30250 >> 2),
335         0x00000000,
336         (0x0e00 << 16) | (0x30254 >> 2),
337         0x00000000,
338         (0x0e00 << 16) | (0x30258 >> 2),
339         0x00000000,
340         (0x0e00 << 16) | (0x3025c >> 2),
341         0x00000000,
342         (0x4e00 << 16) | (0xc900 >> 2),
343         0x00000000,
344         (0x5e00 << 16) | (0xc900 >> 2),
345         0x00000000,
346         (0x6e00 << 16) | (0xc900 >> 2),
347         0x00000000,
348         (0x7e00 << 16) | (0xc900 >> 2),
349         0x00000000,
350         (0x8e00 << 16) | (0xc900 >> 2),
351         0x00000000,
352         (0x9e00 << 16) | (0xc900 >> 2),
353         0x00000000,
354         (0xae00 << 16) | (0xc900 >> 2),
355         0x00000000,
356         (0xbe00 << 16) | (0xc900 >> 2),
357         0x00000000,
358         (0x4e00 << 16) | (0xc904 >> 2),
359         0x00000000,
360         (0x5e00 << 16) | (0xc904 >> 2),
361         0x00000000,
362         (0x6e00 << 16) | (0xc904 >> 2),
363         0x00000000,
364         (0x7e00 << 16) | (0xc904 >> 2),
365         0x00000000,
366         (0x8e00 << 16) | (0xc904 >> 2),
367         0x00000000,
368         (0x9e00 << 16) | (0xc904 >> 2),
369         0x00000000,
370         (0xae00 << 16) | (0xc904 >> 2),
371         0x00000000,
372         (0xbe00 << 16) | (0xc904 >> 2),
373         0x00000000,
374         (0x4e00 << 16) | (0xc908 >> 2),
375         0x00000000,
376         (0x5e00 << 16) | (0xc908 >> 2),
377         0x00000000,
378         (0x6e00 << 16) | (0xc908 >> 2),
379         0x00000000,
380         (0x7e00 << 16) | (0xc908 >> 2),
381         0x00000000,
382         (0x8e00 << 16) | (0xc908 >> 2),
383         0x00000000,
384         (0x9e00 << 16) | (0xc908 >> 2),
385         0x00000000,
386         (0xae00 << 16) | (0xc908 >> 2),
387         0x00000000,
388         (0xbe00 << 16) | (0xc908 >> 2),
389         0x00000000,
390         (0x4e00 << 16) | (0xc90c >> 2),
391         0x00000000,
392         (0x5e00 << 16) | (0xc90c >> 2),
393         0x00000000,
394         (0x6e00 << 16) | (0xc90c >> 2),
395         0x00000000,
396         (0x7e00 << 16) | (0xc90c >> 2),
397         0x00000000,
398         (0x8e00 << 16) | (0xc90c >> 2),
399         0x00000000,
400         (0x9e00 << 16) | (0xc90c >> 2),
401         0x00000000,
402         (0xae00 << 16) | (0xc90c >> 2),
403         0x00000000,
404         (0xbe00 << 16) | (0xc90c >> 2),
405         0x00000000,
406         (0x4e00 << 16) | (0xc910 >> 2),
407         0x00000000,
408         (0x5e00 << 16) | (0xc910 >> 2),
409         0x00000000,
410         (0x6e00 << 16) | (0xc910 >> 2),
411         0x00000000,
412         (0x7e00 << 16) | (0xc910 >> 2),
413         0x00000000,
414         (0x8e00 << 16) | (0xc910 >> 2),
415         0x00000000,
416         (0x9e00 << 16) | (0xc910 >> 2),
417         0x00000000,
418         (0xae00 << 16) | (0xc910 >> 2),
419         0x00000000,
420         (0xbe00 << 16) | (0xc910 >> 2),
421         0x00000000,
422         (0x0e00 << 16) | (0xc99c >> 2),
423         0x00000000,
424         (0x0e00 << 16) | (0x9834 >> 2),
425         0x00000000,
426         (0x0000 << 16) | (0x30f00 >> 2),
427         0x00000000,
428         (0x0001 << 16) | (0x30f00 >> 2),
429         0x00000000,
430         (0x0000 << 16) | (0x30f04 >> 2),
431         0x00000000,
432         (0x0001 << 16) | (0x30f04 >> 2),
433         0x00000000,
434         (0x0000 << 16) | (0x30f08 >> 2),
435         0x00000000,
436         (0x0001 << 16) | (0x30f08 >> 2),
437         0x00000000,
438         (0x0000 << 16) | (0x30f0c >> 2),
439         0x00000000,
440         (0x0001 << 16) | (0x30f0c >> 2),
441         0x00000000,
442         (0x0600 << 16) | (0x9b7c >> 2),
443         0x00000000,
444         (0x0e00 << 16) | (0x8a14 >> 2),
445         0x00000000,
446         (0x0e00 << 16) | (0x8a18 >> 2),
447         0x00000000,
448         (0x0600 << 16) | (0x30a00 >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x8bf0 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x8bcc >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x8b24 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x30a04 >> 2),
457         0x00000000,
458         (0x0600 << 16) | (0x30a10 >> 2),
459         0x00000000,
460         (0x0600 << 16) | (0x30a14 >> 2),
461         0x00000000,
462         (0x0600 << 16) | (0x30a18 >> 2),
463         0x00000000,
464         (0x0600 << 16) | (0x30a2c >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0xc700 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0xc704 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0xc708 >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0xc768 >> 2),
473         0x00000000,
474         (0x0400 << 16) | (0xc770 >> 2),
475         0x00000000,
476         (0x0400 << 16) | (0xc774 >> 2),
477         0x00000000,
478         (0x0400 << 16) | (0xc778 >> 2),
479         0x00000000,
480         (0x0400 << 16) | (0xc77c >> 2),
481         0x00000000,
482         (0x0400 << 16) | (0xc780 >> 2),
483         0x00000000,
484         (0x0400 << 16) | (0xc784 >> 2),
485         0x00000000,
486         (0x0400 << 16) | (0xc788 >> 2),
487         0x00000000,
488         (0x0400 << 16) | (0xc78c >> 2),
489         0x00000000,
490         (0x0400 << 16) | (0xc798 >> 2),
491         0x00000000,
492         (0x0400 << 16) | (0xc79c >> 2),
493         0x00000000,
494         (0x0400 << 16) | (0xc7a0 >> 2),
495         0x00000000,
496         (0x0400 << 16) | (0xc7a4 >> 2),
497         0x00000000,
498         (0x0400 << 16) | (0xc7a8 >> 2),
499         0x00000000,
500         (0x0400 << 16) | (0xc7ac >> 2),
501         0x00000000,
502         (0x0400 << 16) | (0xc7b0 >> 2),
503         0x00000000,
504         (0x0400 << 16) | (0xc7b4 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x9100 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x3c010 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x92a8 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x92ac >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x92b4 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x92b8 >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x92bc >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x92c0 >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0x92c4 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x92c8 >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0x92cc >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0x92d0 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0x8c00 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0x8c04 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0x8c20 >> 2),
535         0x00000000,
536         (0x0e00 << 16) | (0x8c38 >> 2),
537         0x00000000,
538         (0x0e00 << 16) | (0x8c3c >> 2),
539         0x00000000,
540         (0x0e00 << 16) | (0xae00 >> 2),
541         0x00000000,
542         (0x0e00 << 16) | (0x9604 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xac08 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xac0c >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0xac10 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0xac14 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0xac58 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0xac68 >> 2),
555         0x00000000,
556         (0x0e00 << 16) | (0xac6c >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0xac70 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0xac74 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0xac78 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0xac7c >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0xac80 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0xac84 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0xac88 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0xac8c >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x970c >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x9714 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x9718 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x971c >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x31068 >> 2),
583         0x00000000,
584         (0x4e00 << 16) | (0x31068 >> 2),
585         0x00000000,
586         (0x5e00 << 16) | (0x31068 >> 2),
587         0x00000000,
588         (0x6e00 << 16) | (0x31068 >> 2),
589         0x00000000,
590         (0x7e00 << 16) | (0x31068 >> 2),
591         0x00000000,
592         (0x8e00 << 16) | (0x31068 >> 2),
593         0x00000000,
594         (0x9e00 << 16) | (0x31068 >> 2),
595         0x00000000,
596         (0xae00 << 16) | (0x31068 >> 2),
597         0x00000000,
598         (0xbe00 << 16) | (0x31068 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xcd10 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xcd14 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0x88b0 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0x88b4 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0x88b8 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0x88bc >> 2),
611         0x00000000,
612         (0x0400 << 16) | (0x89c0 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0x88c4 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0x88c8 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0x88d0 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0x88d4 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0x88d8 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x8980 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x30938 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x3093c >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0x30940 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x89a0 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x30900 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x30904 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x89b4 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x3c210 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x3c214 >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x3c218 >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x8904 >> 2),
647         0x00000000,
648         0x5,
649         (0x0e00 << 16) | (0x8c28 >> 2),
650         (0x0e00 << 16) | (0x8c2c >> 2),
651         (0x0e00 << 16) | (0x8c30 >> 2),
652         (0x0e00 << 16) | (0x8c34 >> 2),
653         (0x0e00 << 16) | (0x9600 >> 2),
654 };
655
656 static const u32 kalindi_rlc_save_restore_register_list[] =
657 {
658         (0x0e00 << 16) | (0xc12c >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xc140 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xc150 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xc15c >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0xc168 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc170 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc204 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc2b4 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc2b8 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xc2bc >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0xc2c0 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x8228 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x829c >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x869c >> 2),
685         0x00000000,
686         (0x0600 << 16) | (0x98f4 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x98f8 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x9900 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0xc260 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x90e8 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x3c000 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x3c00c >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x8c1c >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x9700 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0xcd20 >> 2),
705         0x00000000,
706         (0x4e00 << 16) | (0xcd20 >> 2),
707         0x00000000,
708         (0x5e00 << 16) | (0xcd20 >> 2),
709         0x00000000,
710         (0x6e00 << 16) | (0xcd20 >> 2),
711         0x00000000,
712         (0x7e00 << 16) | (0xcd20 >> 2),
713         0x00000000,
714         (0x0e00 << 16) | (0x89bc >> 2),
715         0x00000000,
716         (0x0e00 << 16) | (0x8900 >> 2),
717         0x00000000,
718         0x3,
719         (0x0e00 << 16) | (0xc130 >> 2),
720         0x00000000,
721         (0x0e00 << 16) | (0xc134 >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0xc1fc >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0xc208 >> 2),
726         0x00000000,
727         (0x0e00 << 16) | (0xc264 >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0xc268 >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0xc26c >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0xc270 >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0xc274 >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0xc28c >> 2),
738         0x00000000,
739         (0x0e00 << 16) | (0xc290 >> 2),
740         0x00000000,
741         (0x0e00 << 16) | (0xc294 >> 2),
742         0x00000000,
743         (0x0e00 << 16) | (0xc298 >> 2),
744         0x00000000,
745         (0x0e00 << 16) | (0xc2a0 >> 2),
746         0x00000000,
747         (0x0e00 << 16) | (0xc2a4 >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0xc2a8 >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0xc2ac >> 2),
752         0x00000000,
753         (0x0e00 << 16) | (0x301d0 >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0x30238 >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0x30250 >> 2),
758         0x00000000,
759         (0x0e00 << 16) | (0x30254 >> 2),
760         0x00000000,
761         (0x0e00 << 16) | (0x30258 >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x3025c >> 2),
764         0x00000000,
765         (0x4e00 << 16) | (0xc900 >> 2),
766         0x00000000,
767         (0x5e00 << 16) | (0xc900 >> 2),
768         0x00000000,
769         (0x6e00 << 16) | (0xc900 >> 2),
770         0x00000000,
771         (0x7e00 << 16) | (0xc900 >> 2),
772         0x00000000,
773         (0x4e00 << 16) | (0xc904 >> 2),
774         0x00000000,
775         (0x5e00 << 16) | (0xc904 >> 2),
776         0x00000000,
777         (0x6e00 << 16) | (0xc904 >> 2),
778         0x00000000,
779         (0x7e00 << 16) | (0xc904 >> 2),
780         0x00000000,
781         (0x4e00 << 16) | (0xc908 >> 2),
782         0x00000000,
783         (0x5e00 << 16) | (0xc908 >> 2),
784         0x00000000,
785         (0x6e00 << 16) | (0xc908 >> 2),
786         0x00000000,
787         (0x7e00 << 16) | (0xc908 >> 2),
788         0x00000000,
789         (0x4e00 << 16) | (0xc90c >> 2),
790         0x00000000,
791         (0x5e00 << 16) | (0xc90c >> 2),
792         0x00000000,
793         (0x6e00 << 16) | (0xc90c >> 2),
794         0x00000000,
795         (0x7e00 << 16) | (0xc90c >> 2),
796         0x00000000,
797         (0x4e00 << 16) | (0xc910 >> 2),
798         0x00000000,
799         (0x5e00 << 16) | (0xc910 >> 2),
800         0x00000000,
801         (0x6e00 << 16) | (0xc910 >> 2),
802         0x00000000,
803         (0x7e00 << 16) | (0xc910 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc99c >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x9834 >> 2),
808         0x00000000,
809         (0x0000 << 16) | (0x30f00 >> 2),
810         0x00000000,
811         (0x0000 << 16) | (0x30f04 >> 2),
812         0x00000000,
813         (0x0000 << 16) | (0x30f08 >> 2),
814         0x00000000,
815         (0x0000 << 16) | (0x30f0c >> 2),
816         0x00000000,
817         (0x0600 << 16) | (0x9b7c >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x8a14 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x8a18 >> 2),
822         0x00000000,
823         (0x0600 << 16) | (0x30a00 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0x8bf0 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0x8bcc >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0x8b24 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0x30a04 >> 2),
832         0x00000000,
833         (0x0600 << 16) | (0x30a10 >> 2),
834         0x00000000,
835         (0x0600 << 16) | (0x30a14 >> 2),
836         0x00000000,
837         (0x0600 << 16) | (0x30a18 >> 2),
838         0x00000000,
839         (0x0600 << 16) | (0x30a2c >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0xc700 >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xc704 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0xc708 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0xc768 >> 2),
848         0x00000000,
849         (0x0400 << 16) | (0xc770 >> 2),
850         0x00000000,
851         (0x0400 << 16) | (0xc774 >> 2),
852         0x00000000,
853         (0x0400 << 16) | (0xc798 >> 2),
854         0x00000000,
855         (0x0400 << 16) | (0xc79c >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x9100 >> 2),
858         0x00000000,
859         (0x0e00 << 16) | (0x3c010 >> 2),
860         0x00000000,
861         (0x0e00 << 16) | (0x8c00 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0x8c04 >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0x8c20 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x8c38 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x8c3c >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0xae00 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x9604 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0xac08 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0xac0c >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0xac10 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0xac14 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0xac58 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0xac68 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0xac6c >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0xac70 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0xac74 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0xac78 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0xac7c >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0xac80 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0xac84 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xac88 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xac8c >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x970c >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x9714 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x9718 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x971c >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x31068 >> 2),
914         0x00000000,
915         (0x4e00 << 16) | (0x31068 >> 2),
916         0x00000000,
917         (0x5e00 << 16) | (0x31068 >> 2),
918         0x00000000,
919         (0x6e00 << 16) | (0x31068 >> 2),
920         0x00000000,
921         (0x7e00 << 16) | (0x31068 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0xcd10 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0xcd14 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x88b0 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x88b4 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x88b8 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0x88bc >> 2),
934         0x00000000,
935         (0x0400 << 16) | (0x89c0 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0x88c4 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0x88c8 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0x88d0 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0x88d4 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0x88d8 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0x8980 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0x30938 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0x3093c >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0x30940 >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0x89a0 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0x30900 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0x30904 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0x89b4 >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0x3e1fc >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x3c210 >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x3c214 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x3c218 >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x8904 >> 2),
972         0x00000000,
973         0x5,
974         (0x0e00 << 16) | (0x8c28 >> 2),
975         (0x0e00 << 16) | (0x8c2c >> 2),
976         (0x0e00 << 16) | (0x8c30 >> 2),
977         (0x0e00 << 16) | (0x8c34 >> 2),
978         (0x0e00 << 16) | (0x9600 >> 2),
979 };
980
981 static const u32 bonaire_golden_spm_registers[] =
982 {
983         0x30800, 0xe0ffffff, 0xe0000000
984 };
985
986 static const u32 bonaire_golden_common_registers[] =
987 {
988         0xc770, 0xffffffff, 0x00000800,
989         0xc774, 0xffffffff, 0x00000800,
990         0xc798, 0xffffffff, 0x00007fbf,
991         0xc79c, 0xffffffff, 0x00007faf
992 };
993
994 static const u32 bonaire_golden_registers[] =
995 {
996         0x3354, 0x00000333, 0x00000333,
997         0x3350, 0x000c0fc0, 0x00040200,
998         0x9a10, 0x00010000, 0x00058208,
999         0x3c000, 0xffff1fff, 0x00140000,
1000         0x3c200, 0xfdfc0fff, 0x00000100,
1001         0x3c234, 0x40000000, 0x40000200,
1002         0x9830, 0xffffffff, 0x00000000,
1003         0x9834, 0xf00fffff, 0x00000400,
1004         0x9838, 0x0002021c, 0x00020200,
1005         0xc78, 0x00000080, 0x00000000,
1006         0x5bb0, 0x000000f0, 0x00000070,
1007         0x5bc0, 0xf0311fff, 0x80300000,
1008         0x98f8, 0x73773777, 0x12010001,
1009         0x350c, 0x00810000, 0x408af000,
1010         0x7030, 0x31000111, 0x00000011,
1011         0x2f48, 0x73773777, 0x12010001,
1012         0x220c, 0x00007fb6, 0x0021a1b1,
1013         0x2210, 0x00007fb6, 0x002021b1,
1014         0x2180, 0x00007fb6, 0x00002191,
1015         0x2218, 0x00007fb6, 0x002121b1,
1016         0x221c, 0x00007fb6, 0x002021b1,
1017         0x21dc, 0x00007fb6, 0x00002191,
1018         0x21e0, 0x00007fb6, 0x00002191,
1019         0x3628, 0x0000003f, 0x0000000a,
1020         0x362c, 0x0000003f, 0x0000000a,
1021         0x2ae4, 0x00073ffe, 0x000022a2,
1022         0x240c, 0x000007ff, 0x00000000,
1023         0x8a14, 0xf000003f, 0x00000007,
1024         0x8bf0, 0x00002001, 0x00000001,
1025         0x8b24, 0xffffffff, 0x00ffffff,
1026         0x30a04, 0x0000ff0f, 0x00000000,
1027         0x28a4c, 0x07ffffff, 0x06000000,
1028         0x4d8, 0x00000fff, 0x00000100,
1029         0x3e78, 0x00000001, 0x00000002,
1030         0x9100, 0x03000000, 0x0362c688,
1031         0x8c00, 0x000000ff, 0x00000001,
1032         0xe40, 0x00001fff, 0x00001fff,
1033         0x9060, 0x0000007f, 0x00000020,
1034         0x9508, 0x00010000, 0x00010000,
1035         0xac14, 0x000003ff, 0x000000f3,
1036         0xac0c, 0xffffffff, 0x00001032
1037 };
1038
1039 static const u32 bonaire_mgcg_cgcg_init[] =
1040 {
1041         0xc420, 0xffffffff, 0xfffffffc,
1042         0x30800, 0xffffffff, 0xe0000000,
1043         0x3c2a0, 0xffffffff, 0x00000100,
1044         0x3c208, 0xffffffff, 0x00000100,
1045         0x3c2c0, 0xffffffff, 0xc0000100,
1046         0x3c2c8, 0xffffffff, 0xc0000100,
1047         0x3c2c4, 0xffffffff, 0xc0000100,
1048         0x55e4, 0xffffffff, 0x00600100,
1049         0x3c280, 0xffffffff, 0x00000100,
1050         0x3c214, 0xffffffff, 0x06000100,
1051         0x3c220, 0xffffffff, 0x00000100,
1052         0x3c218, 0xffffffff, 0x06000100,
1053         0x3c204, 0xffffffff, 0x00000100,
1054         0x3c2e0, 0xffffffff, 0x00000100,
1055         0x3c224, 0xffffffff, 0x00000100,
1056         0x3c200, 0xffffffff, 0x00000100,
1057         0x3c230, 0xffffffff, 0x00000100,
1058         0x3c234, 0xffffffff, 0x00000100,
1059         0x3c250, 0xffffffff, 0x00000100,
1060         0x3c254, 0xffffffff, 0x00000100,
1061         0x3c258, 0xffffffff, 0x00000100,
1062         0x3c25c, 0xffffffff, 0x00000100,
1063         0x3c260, 0xffffffff, 0x00000100,
1064         0x3c27c, 0xffffffff, 0x00000100,
1065         0x3c278, 0xffffffff, 0x00000100,
1066         0x3c210, 0xffffffff, 0x06000100,
1067         0x3c290, 0xffffffff, 0x00000100,
1068         0x3c274, 0xffffffff, 0x00000100,
1069         0x3c2b4, 0xffffffff, 0x00000100,
1070         0x3c2b0, 0xffffffff, 0x00000100,
1071         0x3c270, 0xffffffff, 0x00000100,
1072         0x30800, 0xffffffff, 0xe0000000,
1073         0x3c020, 0xffffffff, 0x00010000,
1074         0x3c024, 0xffffffff, 0x00030002,
1075         0x3c028, 0xffffffff, 0x00040007,
1076         0x3c02c, 0xffffffff, 0x00060005,
1077         0x3c030, 0xffffffff, 0x00090008,
1078         0x3c034, 0xffffffff, 0x00010000,
1079         0x3c038, 0xffffffff, 0x00030002,
1080         0x3c03c, 0xffffffff, 0x00040007,
1081         0x3c040, 0xffffffff, 0x00060005,
1082         0x3c044, 0xffffffff, 0x00090008,
1083         0x3c048, 0xffffffff, 0x00010000,
1084         0x3c04c, 0xffffffff, 0x00030002,
1085         0x3c050, 0xffffffff, 0x00040007,
1086         0x3c054, 0xffffffff, 0x00060005,
1087         0x3c058, 0xffffffff, 0x00090008,
1088         0x3c05c, 0xffffffff, 0x00010000,
1089         0x3c060, 0xffffffff, 0x00030002,
1090         0x3c064, 0xffffffff, 0x00040007,
1091         0x3c068, 0xffffffff, 0x00060005,
1092         0x3c06c, 0xffffffff, 0x00090008,
1093         0x3c070, 0xffffffff, 0x00010000,
1094         0x3c074, 0xffffffff, 0x00030002,
1095         0x3c078, 0xffffffff, 0x00040007,
1096         0x3c07c, 0xffffffff, 0x00060005,
1097         0x3c080, 0xffffffff, 0x00090008,
1098         0x3c084, 0xffffffff, 0x00010000,
1099         0x3c088, 0xffffffff, 0x00030002,
1100         0x3c08c, 0xffffffff, 0x00040007,
1101         0x3c090, 0xffffffff, 0x00060005,
1102         0x3c094, 0xffffffff, 0x00090008,
1103         0x3c098, 0xffffffff, 0x00010000,
1104         0x3c09c, 0xffffffff, 0x00030002,
1105         0x3c0a0, 0xffffffff, 0x00040007,
1106         0x3c0a4, 0xffffffff, 0x00060005,
1107         0x3c0a8, 0xffffffff, 0x00090008,
1108         0x3c000, 0xffffffff, 0x96e00200,
1109         0x8708, 0xffffffff, 0x00900100,
1110         0xc424, 0xffffffff, 0x0020003f,
1111         0x38, 0xffffffff, 0x0140001c,
1112         0x3c, 0x000f0000, 0x000f0000,
1113         0x220, 0xffffffff, 0xC060000C,
1114         0x224, 0xc0000fff, 0x00000100,
1115         0xf90, 0xffffffff, 0x00000100,
1116         0xf98, 0x00000101, 0x00000000,
1117         0x20a8, 0xffffffff, 0x00000104,
1118         0x55e4, 0xff000fff, 0x00000100,
1119         0x30cc, 0xc0000fff, 0x00000104,
1120         0xc1e4, 0x00000001, 0x00000001,
1121         0xd00c, 0xff000ff0, 0x00000100,
1122         0xd80c, 0xff000ff0, 0x00000100
1123 };
1124
1125 static const u32 spectre_golden_spm_registers[] =
1126 {
1127         0x30800, 0xe0ffffff, 0xe0000000
1128 };
1129
1130 static const u32 spectre_golden_common_registers[] =
1131 {
1132         0xc770, 0xffffffff, 0x00000800,
1133         0xc774, 0xffffffff, 0x00000800,
1134         0xc798, 0xffffffff, 0x00007fbf,
1135         0xc79c, 0xffffffff, 0x00007faf
1136 };
1137
1138 static const u32 spectre_golden_registers[] =
1139 {
1140         0x3c000, 0xffff1fff, 0x96940200,
1141         0x3c00c, 0xffff0001, 0xff000000,
1142         0x3c200, 0xfffc0fff, 0x00000100,
1143         0x6ed8, 0x00010101, 0x00010000,
1144         0x9834, 0xf00fffff, 0x00000400,
1145         0x9838, 0xfffffffc, 0x00020200,
1146         0x5bb0, 0x000000f0, 0x00000070,
1147         0x5bc0, 0xf0311fff, 0x80300000,
1148         0x98f8, 0x73773777, 0x12010001,
1149         0x9b7c, 0x00ff0000, 0x00fc0000,
1150         0x2f48, 0x73773777, 0x12010001,
1151         0x8a14, 0xf000003f, 0x00000007,
1152         0x8b24, 0xffffffff, 0x00ffffff,
1153         0x28350, 0x3f3f3fff, 0x00000082,
1154         0x28354, 0x0000003f, 0x00000000,
1155         0x3e78, 0x00000001, 0x00000002,
1156         0x913c, 0xffff03df, 0x00000004,
1157         0xc768, 0x00000008, 0x00000008,
1158         0x8c00, 0x000008ff, 0x00000800,
1159         0x9508, 0x00010000, 0x00010000,
1160         0xac0c, 0xffffffff, 0x54763210,
1161         0x214f8, 0x01ff01ff, 0x00000002,
1162         0x21498, 0x007ff800, 0x00200000,
1163         0x2015c, 0xffffffff, 0x00000f40,
1164         0x30934, 0xffffffff, 0x00000001
1165 };
1166
1167 static const u32 spectre_mgcg_cgcg_init[] =
1168 {
1169         0xc420, 0xffffffff, 0xfffffffc,
1170         0x30800, 0xffffffff, 0xe0000000,
1171         0x3c2a0, 0xffffffff, 0x00000100,
1172         0x3c208, 0xffffffff, 0x00000100,
1173         0x3c2c0, 0xffffffff, 0x00000100,
1174         0x3c2c8, 0xffffffff, 0x00000100,
1175         0x3c2c4, 0xffffffff, 0x00000100,
1176         0x55e4, 0xffffffff, 0x00600100,
1177         0x3c280, 0xffffffff, 0x00000100,
1178         0x3c214, 0xffffffff, 0x06000100,
1179         0x3c220, 0xffffffff, 0x00000100,
1180         0x3c218, 0xffffffff, 0x06000100,
1181         0x3c204, 0xffffffff, 0x00000100,
1182         0x3c2e0, 0xffffffff, 0x00000100,
1183         0x3c224, 0xffffffff, 0x00000100,
1184         0x3c200, 0xffffffff, 0x00000100,
1185         0x3c230, 0xffffffff, 0x00000100,
1186         0x3c234, 0xffffffff, 0x00000100,
1187         0x3c250, 0xffffffff, 0x00000100,
1188         0x3c254, 0xffffffff, 0x00000100,
1189         0x3c258, 0xffffffff, 0x00000100,
1190         0x3c25c, 0xffffffff, 0x00000100,
1191         0x3c260, 0xffffffff, 0x00000100,
1192         0x3c27c, 0xffffffff, 0x00000100,
1193         0x3c278, 0xffffffff, 0x00000100,
1194         0x3c210, 0xffffffff, 0x06000100,
1195         0x3c290, 0xffffffff, 0x00000100,
1196         0x3c274, 0xffffffff, 0x00000100,
1197         0x3c2b4, 0xffffffff, 0x00000100,
1198         0x3c2b0, 0xffffffff, 0x00000100,
1199         0x3c270, 0xffffffff, 0x00000100,
1200         0x30800, 0xffffffff, 0xe0000000,
1201         0x3c020, 0xffffffff, 0x00010000,
1202         0x3c024, 0xffffffff, 0x00030002,
1203         0x3c028, 0xffffffff, 0x00040007,
1204         0x3c02c, 0xffffffff, 0x00060005,
1205         0x3c030, 0xffffffff, 0x00090008,
1206         0x3c034, 0xffffffff, 0x00010000,
1207         0x3c038, 0xffffffff, 0x00030002,
1208         0x3c03c, 0xffffffff, 0x00040007,
1209         0x3c040, 0xffffffff, 0x00060005,
1210         0x3c044, 0xffffffff, 0x00090008,
1211         0x3c048, 0xffffffff, 0x00010000,
1212         0x3c04c, 0xffffffff, 0x00030002,
1213         0x3c050, 0xffffffff, 0x00040007,
1214         0x3c054, 0xffffffff, 0x00060005,
1215         0x3c058, 0xffffffff, 0x00090008,
1216         0x3c05c, 0xffffffff, 0x00010000,
1217         0x3c060, 0xffffffff, 0x00030002,
1218         0x3c064, 0xffffffff, 0x00040007,
1219         0x3c068, 0xffffffff, 0x00060005,
1220         0x3c06c, 0xffffffff, 0x00090008,
1221         0x3c070, 0xffffffff, 0x00010000,
1222         0x3c074, 0xffffffff, 0x00030002,
1223         0x3c078, 0xffffffff, 0x00040007,
1224         0x3c07c, 0xffffffff, 0x00060005,
1225         0x3c080, 0xffffffff, 0x00090008,
1226         0x3c084, 0xffffffff, 0x00010000,
1227         0x3c088, 0xffffffff, 0x00030002,
1228         0x3c08c, 0xffffffff, 0x00040007,
1229         0x3c090, 0xffffffff, 0x00060005,
1230         0x3c094, 0xffffffff, 0x00090008,
1231         0x3c098, 0xffffffff, 0x00010000,
1232         0x3c09c, 0xffffffff, 0x00030002,
1233         0x3c0a0, 0xffffffff, 0x00040007,
1234         0x3c0a4, 0xffffffff, 0x00060005,
1235         0x3c0a8, 0xffffffff, 0x00090008,
1236         0x3c0ac, 0xffffffff, 0x00010000,
1237         0x3c0b0, 0xffffffff, 0x00030002,
1238         0x3c0b4, 0xffffffff, 0x00040007,
1239         0x3c0b8, 0xffffffff, 0x00060005,
1240         0x3c0bc, 0xffffffff, 0x00090008,
1241         0x3c000, 0xffffffff, 0x96e00200,
1242         0x8708, 0xffffffff, 0x00900100,
1243         0xc424, 0xffffffff, 0x0020003f,
1244         0x38, 0xffffffff, 0x0140001c,
1245         0x3c, 0x000f0000, 0x000f0000,
1246         0x220, 0xffffffff, 0xC060000C,
1247         0x224, 0xc0000fff, 0x00000100,
1248         0xf90, 0xffffffff, 0x00000100,
1249         0xf98, 0x00000101, 0x00000000,
1250         0x20a8, 0xffffffff, 0x00000104,
1251         0x55e4, 0xff000fff, 0x00000100,
1252         0x30cc, 0xc0000fff, 0x00000104,
1253         0xc1e4, 0x00000001, 0x00000001,
1254         0xd00c, 0xff000ff0, 0x00000100,
1255         0xd80c, 0xff000ff0, 0x00000100
1256 };
1257
1258 static const u32 kalindi_golden_spm_registers[] =
1259 {
1260         0x30800, 0xe0ffffff, 0xe0000000
1261 };
1262
1263 static const u32 kalindi_golden_common_registers[] =
1264 {
1265         0xc770, 0xffffffff, 0x00000800,
1266         0xc774, 0xffffffff, 0x00000800,
1267         0xc798, 0xffffffff, 0x00007fbf,
1268         0xc79c, 0xffffffff, 0x00007faf
1269 };
1270
1271 static const u32 kalindi_golden_registers[] =
1272 {
1273         0x3c000, 0xffffdfff, 0x6e944040,
1274         0x55e4, 0xff607fff, 0xfc000100,
1275         0x3c220, 0xff000fff, 0x00000100,
1276         0x3c224, 0xff000fff, 0x00000100,
1277         0x3c200, 0xfffc0fff, 0x00000100,
1278         0x6ed8, 0x00010101, 0x00010000,
1279         0x9830, 0xffffffff, 0x00000000,
1280         0x9834, 0xf00fffff, 0x00000400,
1281         0x5bb0, 0x000000f0, 0x00000070,
1282         0x5bc0, 0xf0311fff, 0x80300000,
1283         0x98f8, 0x73773777, 0x12010001,
1284         0x98fc, 0xffffffff, 0x00000010,
1285         0x9b7c, 0x00ff0000, 0x00fc0000,
1286         0x8030, 0x00001f0f, 0x0000100a,
1287         0x2f48, 0x73773777, 0x12010001,
1288         0x2408, 0x000fffff, 0x000c007f,
1289         0x8a14, 0xf000003f, 0x00000007,
1290         0x8b24, 0x3fff3fff, 0x00ffcfff,
1291         0x30a04, 0x0000ff0f, 0x00000000,
1292         0x28a4c, 0x07ffffff, 0x06000000,
1293         0x4d8, 0x00000fff, 0x00000100,
1294         0x3e78, 0x00000001, 0x00000002,
1295         0xc768, 0x00000008, 0x00000008,
1296         0x8c00, 0x000000ff, 0x00000003,
1297         0x214f8, 0x01ff01ff, 0x00000002,
1298         0x21498, 0x007ff800, 0x00200000,
1299         0x2015c, 0xffffffff, 0x00000f40,
1300         0x88c4, 0x001f3ae3, 0x00000082,
1301         0x88d4, 0x0000001f, 0x00000010,
1302         0x30934, 0xffffffff, 0x00000000
1303 };
1304
1305 static const u32 kalindi_mgcg_cgcg_init[] =
1306 {
1307         0xc420, 0xffffffff, 0xfffffffc,
1308         0x30800, 0xffffffff, 0xe0000000,
1309         0x3c2a0, 0xffffffff, 0x00000100,
1310         0x3c208, 0xffffffff, 0x00000100,
1311         0x3c2c0, 0xffffffff, 0x00000100,
1312         0x3c2c8, 0xffffffff, 0x00000100,
1313         0x3c2c4, 0xffffffff, 0x00000100,
1314         0x55e4, 0xffffffff, 0x00600100,
1315         0x3c280, 0xffffffff, 0x00000100,
1316         0x3c214, 0xffffffff, 0x06000100,
1317         0x3c220, 0xffffffff, 0x00000100,
1318         0x3c218, 0xffffffff, 0x06000100,
1319         0x3c204, 0xffffffff, 0x00000100,
1320         0x3c2e0, 0xffffffff, 0x00000100,
1321         0x3c224, 0xffffffff, 0x00000100,
1322         0x3c200, 0xffffffff, 0x00000100,
1323         0x3c230, 0xffffffff, 0x00000100,
1324         0x3c234, 0xffffffff, 0x00000100,
1325         0x3c250, 0xffffffff, 0x00000100,
1326         0x3c254, 0xffffffff, 0x00000100,
1327         0x3c258, 0xffffffff, 0x00000100,
1328         0x3c25c, 0xffffffff, 0x00000100,
1329         0x3c260, 0xffffffff, 0x00000100,
1330         0x3c27c, 0xffffffff, 0x00000100,
1331         0x3c278, 0xffffffff, 0x00000100,
1332         0x3c210, 0xffffffff, 0x06000100,
1333         0x3c290, 0xffffffff, 0x00000100,
1334         0x3c274, 0xffffffff, 0x00000100,
1335         0x3c2b4, 0xffffffff, 0x00000100,
1336         0x3c2b0, 0xffffffff, 0x00000100,
1337         0x3c270, 0xffffffff, 0x00000100,
1338         0x30800, 0xffffffff, 0xe0000000,
1339         0x3c020, 0xffffffff, 0x00010000,
1340         0x3c024, 0xffffffff, 0x00030002,
1341         0x3c028, 0xffffffff, 0x00040007,
1342         0x3c02c, 0xffffffff, 0x00060005,
1343         0x3c030, 0xffffffff, 0x00090008,
1344         0x3c034, 0xffffffff, 0x00010000,
1345         0x3c038, 0xffffffff, 0x00030002,
1346         0x3c03c, 0xffffffff, 0x00040007,
1347         0x3c040, 0xffffffff, 0x00060005,
1348         0x3c044, 0xffffffff, 0x00090008,
1349         0x3c000, 0xffffffff, 0x96e00200,
1350         0x8708, 0xffffffff, 0x00900100,
1351         0xc424, 0xffffffff, 0x0020003f,
1352         0x38, 0xffffffff, 0x0140001c,
1353         0x3c, 0x000f0000, 0x000f0000,
1354         0x220, 0xffffffff, 0xC060000C,
1355         0x224, 0xc0000fff, 0x00000100,
1356         0x20a8, 0xffffffff, 0x00000104,
1357         0x55e4, 0xff000fff, 0x00000100,
1358         0x30cc, 0xc0000fff, 0x00000104,
1359         0xc1e4, 0x00000001, 0x00000001,
1360         0xd00c, 0xff000ff0, 0x00000100,
1361         0xd80c, 0xff000ff0, 0x00000100
1362 };
1363
1364 static const u32 hawaii_golden_spm_registers[] =
1365 {
1366         0x30800, 0xe0ffffff, 0xe0000000
1367 };
1368
1369 static const u32 hawaii_golden_common_registers[] =
1370 {
1371         0x30800, 0xffffffff, 0xe0000000,
1372         0x28350, 0xffffffff, 0x3a00161a,
1373         0x28354, 0xffffffff, 0x0000002e,
1374         0x9a10, 0xffffffff, 0x00018208,
1375         0x98f8, 0xffffffff, 0x12011003
1376 };
1377
1378 static const u32 hawaii_golden_registers[] =
1379 {
1380         0x3354, 0x00000333, 0x00000333,
1381         0x9a10, 0x00010000, 0x00058208,
1382         0x9830, 0xffffffff, 0x00000000,
1383         0x9834, 0xf00fffff, 0x00000400,
1384         0x9838, 0x0002021c, 0x00020200,
1385         0xc78, 0x00000080, 0x00000000,
1386         0x5bb0, 0x000000f0, 0x00000070,
1387         0x5bc0, 0xf0311fff, 0x80300000,
1388         0x350c, 0x00810000, 0x408af000,
1389         0x7030, 0x31000111, 0x00000011,
1390         0x2f48, 0x73773777, 0x12010001,
1391         0x2120, 0x0000007f, 0x0000001b,
1392         0x21dc, 0x00007fb6, 0x00002191,
1393         0x3628, 0x0000003f, 0x0000000a,
1394         0x362c, 0x0000003f, 0x0000000a,
1395         0x2ae4, 0x00073ffe, 0x000022a2,
1396         0x240c, 0x000007ff, 0x00000000,
1397         0x8bf0, 0x00002001, 0x00000001,
1398         0x8b24, 0xffffffff, 0x00ffffff,
1399         0x30a04, 0x0000ff0f, 0x00000000,
1400         0x28a4c, 0x07ffffff, 0x06000000,
1401         0x3e78, 0x00000001, 0x00000002,
1402         0xc768, 0x00000008, 0x00000008,
1403         0xc770, 0x00000f00, 0x00000800,
1404         0xc774, 0x00000f00, 0x00000800,
1405         0xc798, 0x00ffffff, 0x00ff7fbf,
1406         0xc79c, 0x00ffffff, 0x00ff7faf,
1407         0x8c00, 0x000000ff, 0x00000800,
1408         0xe40, 0x00001fff, 0x00001fff,
1409         0x9060, 0x0000007f, 0x00000020,
1410         0x9508, 0x00010000, 0x00010000,
1411         0xae00, 0x00100000, 0x000ff07c,
1412         0xac14, 0x000003ff, 0x0000000f,
1413         0xac10, 0xffffffff, 0x7564fdec,
1414         0xac0c, 0xffffffff, 0x3120b9a8,
1415         0xac08, 0x20000000, 0x0f9c0000
1416 };
1417
1418 static const u32 hawaii_mgcg_cgcg_init[] =
1419 {
1420         0xc420, 0xffffffff, 0xfffffffd,
1421         0x30800, 0xffffffff, 0xe0000000,
1422         0x3c2a0, 0xffffffff, 0x00000100,
1423         0x3c208, 0xffffffff, 0x00000100,
1424         0x3c2c0, 0xffffffff, 0x00000100,
1425         0x3c2c8, 0xffffffff, 0x00000100,
1426         0x3c2c4, 0xffffffff, 0x00000100,
1427         0x55e4, 0xffffffff, 0x00200100,
1428         0x3c280, 0xffffffff, 0x00000100,
1429         0x3c214, 0xffffffff, 0x06000100,
1430         0x3c220, 0xffffffff, 0x00000100,
1431         0x3c218, 0xffffffff, 0x06000100,
1432         0x3c204, 0xffffffff, 0x00000100,
1433         0x3c2e0, 0xffffffff, 0x00000100,
1434         0x3c224, 0xffffffff, 0x00000100,
1435         0x3c200, 0xffffffff, 0x00000100,
1436         0x3c230, 0xffffffff, 0x00000100,
1437         0x3c234, 0xffffffff, 0x00000100,
1438         0x3c250, 0xffffffff, 0x00000100,
1439         0x3c254, 0xffffffff, 0x00000100,
1440         0x3c258, 0xffffffff, 0x00000100,
1441         0x3c25c, 0xffffffff, 0x00000100,
1442         0x3c260, 0xffffffff, 0x00000100,
1443         0x3c27c, 0xffffffff, 0x00000100,
1444         0x3c278, 0xffffffff, 0x00000100,
1445         0x3c210, 0xffffffff, 0x06000100,
1446         0x3c290, 0xffffffff, 0x00000100,
1447         0x3c274, 0xffffffff, 0x00000100,
1448         0x3c2b4, 0xffffffff, 0x00000100,
1449         0x3c2b0, 0xffffffff, 0x00000100,
1450         0x3c270, 0xffffffff, 0x00000100,
1451         0x30800, 0xffffffff, 0xe0000000,
1452         0x3c020, 0xffffffff, 0x00010000,
1453         0x3c024, 0xffffffff, 0x00030002,
1454         0x3c028, 0xffffffff, 0x00040007,
1455         0x3c02c, 0xffffffff, 0x00060005,
1456         0x3c030, 0xffffffff, 0x00090008,
1457         0x3c034, 0xffffffff, 0x00010000,
1458         0x3c038, 0xffffffff, 0x00030002,
1459         0x3c03c, 0xffffffff, 0x00040007,
1460         0x3c040, 0xffffffff, 0x00060005,
1461         0x3c044, 0xffffffff, 0x00090008,
1462         0x3c048, 0xffffffff, 0x00010000,
1463         0x3c04c, 0xffffffff, 0x00030002,
1464         0x3c050, 0xffffffff, 0x00040007,
1465         0x3c054, 0xffffffff, 0x00060005,
1466         0x3c058, 0xffffffff, 0x00090008,
1467         0x3c05c, 0xffffffff, 0x00010000,
1468         0x3c060, 0xffffffff, 0x00030002,
1469         0x3c064, 0xffffffff, 0x00040007,
1470         0x3c068, 0xffffffff, 0x00060005,
1471         0x3c06c, 0xffffffff, 0x00090008,
1472         0x3c070, 0xffffffff, 0x00010000,
1473         0x3c074, 0xffffffff, 0x00030002,
1474         0x3c078, 0xffffffff, 0x00040007,
1475         0x3c07c, 0xffffffff, 0x00060005,
1476         0x3c080, 0xffffffff, 0x00090008,
1477         0x3c084, 0xffffffff, 0x00010000,
1478         0x3c088, 0xffffffff, 0x00030002,
1479         0x3c08c, 0xffffffff, 0x00040007,
1480         0x3c090, 0xffffffff, 0x00060005,
1481         0x3c094, 0xffffffff, 0x00090008,
1482         0x3c098, 0xffffffff, 0x00010000,
1483         0x3c09c, 0xffffffff, 0x00030002,
1484         0x3c0a0, 0xffffffff, 0x00040007,
1485         0x3c0a4, 0xffffffff, 0x00060005,
1486         0x3c0a8, 0xffffffff, 0x00090008,
1487         0x3c0ac, 0xffffffff, 0x00010000,
1488         0x3c0b0, 0xffffffff, 0x00030002,
1489         0x3c0b4, 0xffffffff, 0x00040007,
1490         0x3c0b8, 0xffffffff, 0x00060005,
1491         0x3c0bc, 0xffffffff, 0x00090008,
1492         0x3c0c0, 0xffffffff, 0x00010000,
1493         0x3c0c4, 0xffffffff, 0x00030002,
1494         0x3c0c8, 0xffffffff, 0x00040007,
1495         0x3c0cc, 0xffffffff, 0x00060005,
1496         0x3c0d0, 0xffffffff, 0x00090008,
1497         0x3c0d4, 0xffffffff, 0x00010000,
1498         0x3c0d8, 0xffffffff, 0x00030002,
1499         0x3c0dc, 0xffffffff, 0x00040007,
1500         0x3c0e0, 0xffffffff, 0x00060005,
1501         0x3c0e4, 0xffffffff, 0x00090008,
1502         0x3c0e8, 0xffffffff, 0x00010000,
1503         0x3c0ec, 0xffffffff, 0x00030002,
1504         0x3c0f0, 0xffffffff, 0x00040007,
1505         0x3c0f4, 0xffffffff, 0x00060005,
1506         0x3c0f8, 0xffffffff, 0x00090008,
1507         0xc318, 0xffffffff, 0x00020200,
1508         0x3350, 0xffffffff, 0x00000200,
1509         0x15c0, 0xffffffff, 0x00000400,
1510         0x55e8, 0xffffffff, 0x00000000,
1511         0x2f50, 0xffffffff, 0x00000902,
1512         0x3c000, 0xffffffff, 0x96940200,
1513         0x8708, 0xffffffff, 0x00900100,
1514         0xc424, 0xffffffff, 0x0020003f,
1515         0x38, 0xffffffff, 0x0140001c,
1516         0x3c, 0x000f0000, 0x000f0000,
1517         0x220, 0xffffffff, 0xc060000c,
1518         0x224, 0xc0000fff, 0x00000100,
1519         0xf90, 0xffffffff, 0x00000100,
1520         0xf98, 0x00000101, 0x00000000,
1521         0x20a8, 0xffffffff, 0x00000104,
1522         0x55e4, 0xff000fff, 0x00000100,
1523         0x30cc, 0xc0000fff, 0x00000104,
1524         0xc1e4, 0x00000001, 0x00000001,
1525         0xd00c, 0xff000ff0, 0x00000100,
1526         0xd80c, 0xff000ff0, 0x00000100
1527 };
1528
1529 static const u32 godavari_golden_registers[] =
1530 {
1531         0x55e4, 0xff607fff, 0xfc000100,
1532         0x6ed8, 0x00010101, 0x00010000,
1533         0x9830, 0xffffffff, 0x00000000,
1534         0x98302, 0xf00fffff, 0x00000400,
1535         0x6130, 0xffffffff, 0x00010000,
1536         0x5bb0, 0x000000f0, 0x00000070,
1537         0x5bc0, 0xf0311fff, 0x80300000,
1538         0x98f8, 0x73773777, 0x12010001,
1539         0x98fc, 0xffffffff, 0x00000010,
1540         0x8030, 0x00001f0f, 0x0000100a,
1541         0x2f48, 0x73773777, 0x12010001,
1542         0x2408, 0x000fffff, 0x000c007f,
1543         0x8a14, 0xf000003f, 0x00000007,
1544         0x8b24, 0xffffffff, 0x00ff0fff,
1545         0x30a04, 0x0000ff0f, 0x00000000,
1546         0x28a4c, 0x07ffffff, 0x06000000,
1547         0x4d8, 0x00000fff, 0x00000100,
1548         0xd014, 0x00010000, 0x00810001,
1549         0xd814, 0x00010000, 0x00810001,
1550         0x3e78, 0x00000001, 0x00000002,
1551         0xc768, 0x00000008, 0x00000008,
1552         0xc770, 0x00000f00, 0x00000800,
1553         0xc774, 0x00000f00, 0x00000800,
1554         0xc798, 0x00ffffff, 0x00ff7fbf,
1555         0xc79c, 0x00ffffff, 0x00ff7faf,
1556         0x8c00, 0x000000ff, 0x00000001,
1557         0x214f8, 0x01ff01ff, 0x00000002,
1558         0x21498, 0x007ff800, 0x00200000,
1559         0x2015c, 0xffffffff, 0x00000f40,
1560         0x88c4, 0x001f3ae3, 0x00000082,
1561         0x88d4, 0x0000001f, 0x00000010,
1562         0x30934, 0xffffffff, 0x00000000
1563 };
1564
1565
1566 static void cik_init_golden_registers(struct radeon_device *rdev)
1567 {
1568         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1569         mutex_lock(&rdev->grbm_idx_mutex);
1570         switch (rdev->family) {
1571         case CHIP_BONAIRE:
1572                 radeon_program_register_sequence(rdev,
1573                                                  bonaire_mgcg_cgcg_init,
1574                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1575                 radeon_program_register_sequence(rdev,
1576                                                  bonaire_golden_registers,
1577                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1578                 radeon_program_register_sequence(rdev,
1579                                                  bonaire_golden_common_registers,
1580                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1581                 radeon_program_register_sequence(rdev,
1582                                                  bonaire_golden_spm_registers,
1583                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1584                 break;
1585         case CHIP_KABINI:
1586                 radeon_program_register_sequence(rdev,
1587                                                  kalindi_mgcg_cgcg_init,
1588                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1589                 radeon_program_register_sequence(rdev,
1590                                                  kalindi_golden_registers,
1591                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1592                 radeon_program_register_sequence(rdev,
1593                                                  kalindi_golden_common_registers,
1594                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1595                 radeon_program_register_sequence(rdev,
1596                                                  kalindi_golden_spm_registers,
1597                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1598                 break;
1599         case CHIP_MULLINS:
1600                 radeon_program_register_sequence(rdev,
1601                                                  kalindi_mgcg_cgcg_init,
1602                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1603                 radeon_program_register_sequence(rdev,
1604                                                  godavari_golden_registers,
1605                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1606                 radeon_program_register_sequence(rdev,
1607                                                  kalindi_golden_common_registers,
1608                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1609                 radeon_program_register_sequence(rdev,
1610                                                  kalindi_golden_spm_registers,
1611                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1612                 break;
1613         case CHIP_KAVERI:
1614                 radeon_program_register_sequence(rdev,
1615                                                  spectre_mgcg_cgcg_init,
1616                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1617                 radeon_program_register_sequence(rdev,
1618                                                  spectre_golden_registers,
1619                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1620                 radeon_program_register_sequence(rdev,
1621                                                  spectre_golden_common_registers,
1622                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1623                 radeon_program_register_sequence(rdev,
1624                                                  spectre_golden_spm_registers,
1625                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1626                 break;
1627         case CHIP_HAWAII:
1628                 radeon_program_register_sequence(rdev,
1629                                                  hawaii_mgcg_cgcg_init,
1630                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1631                 radeon_program_register_sequence(rdev,
1632                                                  hawaii_golden_registers,
1633                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1634                 radeon_program_register_sequence(rdev,
1635                                                  hawaii_golden_common_registers,
1636                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1637                 radeon_program_register_sequence(rdev,
1638                                                  hawaii_golden_spm_registers,
1639                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1640                 break;
1641         default:
1642                 break;
1643         }
1644         mutex_unlock(&rdev->grbm_idx_mutex);
1645 }
1646
1647 /**
1648  * cik_get_xclk - get the xclk
1649  *
1650  * @rdev: radeon_device pointer
1651  *
1652  * Returns the reference clock used by the gfx engine
1653  * (CIK).
1654  */
1655 u32 cik_get_xclk(struct radeon_device *rdev)
1656 {
1657         u32 reference_clock = rdev->clock.spll.reference_freq;
1658
1659         if (rdev->flags & RADEON_IS_IGP) {
1660                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1661                         return reference_clock / 2;
1662         } else {
1663                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1664                         return reference_clock / 4;
1665         }
1666         return reference_clock;
1667 }
1668
1669 /**
1670  * cik_mm_rdoorbell - read a doorbell dword
1671  *
1672  * @rdev: radeon_device pointer
1673  * @index: doorbell index
1674  *
1675  * Returns the value in the doorbell aperture at the
1676  * requested doorbell index (CIK).
1677  */
1678 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1679 {
1680         if (index < rdev->doorbell.num_doorbells) {
1681                 return readl(rdev->doorbell.ptr + index);
1682         } else {
1683                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1684                 return 0;
1685         }
1686 }
1687
1688 /**
1689  * cik_mm_wdoorbell - write a doorbell dword
1690  *
1691  * @rdev: radeon_device pointer
1692  * @index: doorbell index
1693  * @v: value to write
1694  *
1695  * Writes @v to the doorbell aperture at the
1696  * requested doorbell index (CIK).
1697  */
1698 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1699 {
1700         if (index < rdev->doorbell.num_doorbells) {
1701                 writel(v, rdev->doorbell.ptr + index);
1702         } else {
1703                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1704         }
1705 }
1706
1707 #define BONAIRE_IO_MC_REGS_SIZE 36
1708
1709 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1710 {
1711         {0x00000070, 0x04400000},
1712         {0x00000071, 0x80c01803},
1713         {0x00000072, 0x00004004},
1714         {0x00000073, 0x00000100},
1715         {0x00000074, 0x00ff0000},
1716         {0x00000075, 0x34000000},
1717         {0x00000076, 0x08000014},
1718         {0x00000077, 0x00cc08ec},
1719         {0x00000078, 0x00000400},
1720         {0x00000079, 0x00000000},
1721         {0x0000007a, 0x04090000},
1722         {0x0000007c, 0x00000000},
1723         {0x0000007e, 0x4408a8e8},
1724         {0x0000007f, 0x00000304},
1725         {0x00000080, 0x00000000},
1726         {0x00000082, 0x00000001},
1727         {0x00000083, 0x00000002},
1728         {0x00000084, 0xf3e4f400},
1729         {0x00000085, 0x052024e3},
1730         {0x00000087, 0x00000000},
1731         {0x00000088, 0x01000000},
1732         {0x0000008a, 0x1c0a0000},
1733         {0x0000008b, 0xff010000},
1734         {0x0000008d, 0xffffefff},
1735         {0x0000008e, 0xfff3efff},
1736         {0x0000008f, 0xfff3efbf},
1737         {0x00000092, 0xf7ffffff},
1738         {0x00000093, 0xffffff7f},
1739         {0x00000095, 0x00101101},
1740         {0x00000096, 0x00000fff},
1741         {0x00000097, 0x00116fff},
1742         {0x00000098, 0x60010000},
1743         {0x00000099, 0x10010000},
1744         {0x0000009a, 0x00006000},
1745         {0x0000009b, 0x00001000},
1746         {0x0000009f, 0x00b48000}
1747 };
1748
1749 #define HAWAII_IO_MC_REGS_SIZE 22
1750
1751 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1752 {
1753         {0x0000007d, 0x40000000},
1754         {0x0000007e, 0x40180304},
1755         {0x0000007f, 0x0000ff00},
1756         {0x00000081, 0x00000000},
1757         {0x00000083, 0x00000800},
1758         {0x00000086, 0x00000000},
1759         {0x00000087, 0x00000100},
1760         {0x00000088, 0x00020100},
1761         {0x00000089, 0x00000000},
1762         {0x0000008b, 0x00040000},
1763         {0x0000008c, 0x00000100},
1764         {0x0000008e, 0xff010000},
1765         {0x00000090, 0xffffefff},
1766         {0x00000091, 0xfff3efff},
1767         {0x00000092, 0xfff3efbf},
1768         {0x00000093, 0xf7ffffff},
1769         {0x00000094, 0xffffff7f},
1770         {0x00000095, 0x00000fff},
1771         {0x00000096, 0x00116fff},
1772         {0x00000097, 0x60010000},
1773         {0x00000098, 0x10010000},
1774         {0x0000009f, 0x00c79000}
1775 };
1776
1777
1778 /**
1779  * cik_srbm_select - select specific register instances
1780  *
1781  * @rdev: radeon_device pointer
1782  * @me: selected ME (micro engine)
1783  * @pipe: pipe
1784  * @queue: queue
1785  * @vmid: VMID
1786  *
1787  * Switches the currently active registers instances.  Some
1788  * registers are instanced per VMID, others are instanced per
1789  * me/pipe/queue combination.
1790  */
1791 static void cik_srbm_select(struct radeon_device *rdev,
1792                             u32 me, u32 pipe, u32 queue, u32 vmid)
1793 {
1794         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1795                              MEID(me & 0x3) |
1796                              VMID(vmid & 0xf) |
1797                              QUEUEID(queue & 0x7));
1798         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1799 }
1800
1801 /* ucode loading */
1802 /**
1803  * ci_mc_load_microcode - load MC ucode into the hw
1804  *
1805  * @rdev: radeon_device pointer
1806  *
1807  * Load the GDDR MC ucode into the hw (CIK).
1808  * Returns 0 on success, error on failure.
1809  */
1810 int ci_mc_load_microcode(struct radeon_device *rdev)
1811 {
1812         const __be32 *fw_data = NULL;
1813         const __le32 *new_fw_data = NULL;
1814         u32 running, blackout = 0, tmp;
1815         u32 *io_mc_regs = NULL;
1816         const __le32 *new_io_mc_regs = NULL;
1817         int i, regs_size, ucode_size;
1818
1819         if (!rdev->mc_fw)
1820                 return -EINVAL;
1821
1822         if (rdev->new_fw) {
1823                 const struct mc_firmware_header_v1_0 *hdr =
1824                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1825
1826                 radeon_ucode_print_mc_hdr(&hdr->header);
1827
1828                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1829                 new_io_mc_regs = (const __le32 *)
1830                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1831                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1832                 new_fw_data = (const __le32 *)
1833                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1834         } else {
1835                 ucode_size = rdev->mc_fw->size / 4;
1836
1837                 switch (rdev->family) {
1838                 case CHIP_BONAIRE:
1839                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1840                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1841                         break;
1842                 case CHIP_HAWAII:
1843                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1844                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1845                         break;
1846                 default:
1847                         return -EINVAL;
1848                 }
1849                 fw_data = (const __be32 *)rdev->mc_fw->data;
1850         }
1851
1852         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1853
1854         if (running == 0) {
1855                 if (running) {
1856                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1857                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1858                 }
1859
1860                 /* reset the engine and set to writable */
1861                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1862                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1863
1864                 /* load mc io regs */
1865                 for (i = 0; i < regs_size; i++) {
1866                         if (rdev->new_fw) {
1867                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1868                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1869                         } else {
1870                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1871                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1872                         }
1873                 }
1874
1875                 tmp = RREG32(MC_SEQ_MISC0);
1876                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1877                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1878                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1879                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1880                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1881                 }
1882
1883                 /* load the MC ucode */
1884                 for (i = 0; i < ucode_size; i++) {
1885                         if (rdev->new_fw)
1886                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1887                         else
1888                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1889                 }
1890
1891                 /* put the engine back into the active state */
1892                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1893                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1894                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1895
1896                 /* wait for training to complete */
1897                 for (i = 0; i < rdev->usec_timeout; i++) {
1898                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1899                                 break;
1900                         udelay(1);
1901                 }
1902                 for (i = 0; i < rdev->usec_timeout; i++) {
1903                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1904                                 break;
1905                         udelay(1);
1906                 }
1907
1908                 if (running)
1909                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1910         }
1911
1912         return 0;
1913 }
1914
1915 /**
1916  * cik_init_microcode - load ucode images from disk
1917  *
1918  * @rdev: radeon_device pointer
1919  *
1920  * Use the firmware interface to load the ucode images into
1921  * the driver (not loaded into hw).
1922  * Returns 0 on success, error on failure.
1923  */
1924 static int cik_init_microcode(struct radeon_device *rdev)
1925 {
1926         const char *chip_name;
1927         const char *new_chip_name;
1928         size_t pfp_req_size, me_req_size, ce_req_size,
1929                 mec_req_size, rlc_req_size, mc_req_size = 0,
1930                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1931         char fw_name[30];
1932         int new_fw = 0;
1933         int err;
1934         int num_fw;
1935
1936         DRM_DEBUG("\n");
1937
1938         switch (rdev->family) {
1939         case CHIP_BONAIRE:
1940                 chip_name = "BONAIRE";
1941                 new_chip_name = "bonaire";
1942                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1943                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1944                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1945                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1946                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1947                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1948                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1949                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1950                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1951                 num_fw = 8;
1952                 break;
1953         case CHIP_HAWAII:
1954                 chip_name = "HAWAII";
1955                 new_chip_name = "hawaii";
1956                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1957                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1958                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1959                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1960                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1961                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1962                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1963                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1964                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1965                 num_fw = 8;
1966                 break;
1967         case CHIP_KAVERI:
1968                 chip_name = "KAVERI";
1969                 new_chip_name = "kaveri";
1970                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1971                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1972                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1973                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1974                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1975                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1976                 num_fw = 7;
1977                 break;
1978         case CHIP_KABINI:
1979                 chip_name = "KABINI";
1980                 new_chip_name = "kabini";
1981                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1982                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1983                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1984                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1985                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1986                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1987                 num_fw = 6;
1988                 break;
1989         case CHIP_MULLINS:
1990                 chip_name = "MULLINS";
1991                 new_chip_name = "mullins";
1992                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1993                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1994                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1995                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1996                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1997                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1998                 num_fw = 6;
1999                 break;
2000         default: BUG();
2001         }
2002
2003         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2004
2005         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2006         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2007         if (err) {
2008                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2009                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2010                 if (err)
2011                         goto out;
2012                 if (rdev->pfp_fw->size != pfp_req_size) {
2013                         printk(KERN_ERR
2014                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2015                                rdev->pfp_fw->size, fw_name);
2016                         err = -EINVAL;
2017                         goto out;
2018                 }
2019         } else {
2020                 err = radeon_ucode_validate(rdev->pfp_fw);
2021                 if (err) {
2022                         printk(KERN_ERR
2023                                "cik_fw: validation failed for firmware \"%s\"\n",
2024                                fw_name);
2025                         goto out;
2026                 } else {
2027                         new_fw++;
2028                 }
2029         }
2030
2031         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2032         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2033         if (err) {
2034                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2035                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2036                 if (err)
2037                         goto out;
2038                 if (rdev->me_fw->size != me_req_size) {
2039                         printk(KERN_ERR
2040                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2041                                rdev->me_fw->size, fw_name);
2042                         err = -EINVAL;
2043                 }
2044         } else {
2045                 err = radeon_ucode_validate(rdev->me_fw);
2046                 if (err) {
2047                         printk(KERN_ERR
2048                                "cik_fw: validation failed for firmware \"%s\"\n",
2049                                fw_name);
2050                         goto out;
2051                 } else {
2052                         new_fw++;
2053                 }
2054         }
2055
2056         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2057         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2058         if (err) {
2059                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2060                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2061                 if (err)
2062                         goto out;
2063                 if (rdev->ce_fw->size != ce_req_size) {
2064                         printk(KERN_ERR
2065                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2066                                rdev->ce_fw->size, fw_name);
2067                         err = -EINVAL;
2068                 }
2069         } else {
2070                 err = radeon_ucode_validate(rdev->ce_fw);
2071                 if (err) {
2072                         printk(KERN_ERR
2073                                "cik_fw: validation failed for firmware \"%s\"\n",
2074                                fw_name);
2075                         goto out;
2076                 } else {
2077                         new_fw++;
2078                 }
2079         }
2080
2081         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2082         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2083         if (err) {
2084                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2085                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2086                 if (err)
2087                         goto out;
2088                 if (rdev->mec_fw->size != mec_req_size) {
2089                         printk(KERN_ERR
2090                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2091                                rdev->mec_fw->size, fw_name);
2092                         err = -EINVAL;
2093                 }
2094         } else {
2095                 err = radeon_ucode_validate(rdev->mec_fw);
2096                 if (err) {
2097                         printk(KERN_ERR
2098                                "cik_fw: validation failed for firmware \"%s\"\n",
2099                                fw_name);
2100                         goto out;
2101                 } else {
2102                         new_fw++;
2103                 }
2104         }
2105
2106         if (rdev->family == CHIP_KAVERI) {
2107                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2108                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2109                 if (err) {
2110                         goto out;
2111                 } else {
2112                         err = radeon_ucode_validate(rdev->mec2_fw);
2113                         if (err) {
2114                                 goto out;
2115                         } else {
2116                                 new_fw++;
2117                         }
2118                 }
2119         }
2120
2121         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2122         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2123         if (err) {
2124                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2125                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2126                 if (err)
2127                         goto out;
2128                 if (rdev->rlc_fw->size != rlc_req_size) {
2129                         printk(KERN_ERR
2130                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2131                                rdev->rlc_fw->size, fw_name);
2132                         err = -EINVAL;
2133                 }
2134         } else {
2135                 err = radeon_ucode_validate(rdev->rlc_fw);
2136                 if (err) {
2137                         printk(KERN_ERR
2138                                "cik_fw: validation failed for firmware \"%s\"\n",
2139                                fw_name);
2140                         goto out;
2141                 } else {
2142                         new_fw++;
2143                 }
2144         }
2145
2146         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2147         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2148         if (err) {
2149                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2150                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2151                 if (err)
2152                         goto out;
2153                 if (rdev->sdma_fw->size != sdma_req_size) {
2154                         printk(KERN_ERR
2155                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2156                                rdev->sdma_fw->size, fw_name);
2157                         err = -EINVAL;
2158                 }
2159         } else {
2160                 err = radeon_ucode_validate(rdev->sdma_fw);
2161                 if (err) {
2162                         printk(KERN_ERR
2163                                "cik_fw: validation failed for firmware \"%s\"\n",
2164                                fw_name);
2165                         goto out;
2166                 } else {
2167                         new_fw++;
2168                 }
2169         }
2170
2171         /* No SMC, MC ucode on APUs */
2172         if (!(rdev->flags & RADEON_IS_IGP)) {
2173                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2174                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2175                 if (err) {
2176                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2177                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2178                         if (err) {
2179                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2180                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2181                                 if (err)
2182                                         goto out;
2183                         }
2184                         if ((rdev->mc_fw->size != mc_req_size) &&
2185                             (rdev->mc_fw->size != mc2_req_size)){
2186                                 printk(KERN_ERR
2187                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2188                                        rdev->mc_fw->size, fw_name);
2189                                 err = -EINVAL;
2190                         }
2191                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2192                 } else {
2193                         err = radeon_ucode_validate(rdev->mc_fw);
2194                         if (err) {
2195                                 printk(KERN_ERR
2196                                        "cik_fw: validation failed for firmware \"%s\"\n",
2197                                        fw_name);
2198                                 goto out;
2199                         } else {
2200                                 new_fw++;
2201                         }
2202                 }
2203
2204                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2205                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2206                 if (err) {
2207                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2208                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2209                         if (err) {
2210                                 printk(KERN_ERR
2211                                        "smc: error loading firmware \"%s\"\n",
2212                                        fw_name);
2213                                 release_firmware(rdev->smc_fw);
2214                                 rdev->smc_fw = NULL;
2215                                 err = 0;
2216                         } else if (rdev->smc_fw->size != smc_req_size) {
2217                                 printk(KERN_ERR
2218                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2219                                        rdev->smc_fw->size, fw_name);
2220                                 err = -EINVAL;
2221                         }
2222                 } else {
2223                         err = radeon_ucode_validate(rdev->smc_fw);
2224                         if (err) {
2225                                 printk(KERN_ERR
2226                                        "cik_fw: validation failed for firmware \"%s\"\n",
2227                                        fw_name);
2228                                 goto out;
2229                         } else {
2230                                 new_fw++;
2231                         }
2232                 }
2233         }
2234
2235         if (new_fw == 0) {
2236                 rdev->new_fw = false;
2237         } else if (new_fw < num_fw) {
2238                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2239                 err = -EINVAL;
2240         } else {
2241                 rdev->new_fw = true;
2242         }
2243
2244 out:
2245         if (err) {
2246                 if (err != -EINVAL)
2247                         printk(KERN_ERR
2248                                "cik_cp: Failed to load firmware \"%s\"\n",
2249                                fw_name);
2250                 release_firmware(rdev->pfp_fw);
2251                 rdev->pfp_fw = NULL;
2252                 release_firmware(rdev->me_fw);
2253                 rdev->me_fw = NULL;
2254                 release_firmware(rdev->ce_fw);
2255                 rdev->ce_fw = NULL;
2256                 release_firmware(rdev->mec_fw);
2257                 rdev->mec_fw = NULL;
2258                 release_firmware(rdev->mec2_fw);
2259                 rdev->mec2_fw = NULL;
2260                 release_firmware(rdev->rlc_fw);
2261                 rdev->rlc_fw = NULL;
2262                 release_firmware(rdev->sdma_fw);
2263                 rdev->sdma_fw = NULL;
2264                 release_firmware(rdev->mc_fw);
2265                 rdev->mc_fw = NULL;
2266                 release_firmware(rdev->smc_fw);
2267                 rdev->smc_fw = NULL;
2268         }
2269         return err;
2270 }
2271
2272 /*
2273  * Core functions
2274  */
2275 /**
2276  * cik_tiling_mode_table_init - init the hw tiling table
2277  *
2278  * @rdev: radeon_device pointer
2279  *
2280  * Starting with SI, the tiling setup is done globally in a
2281  * set of 32 tiling modes.  Rather than selecting each set of
2282  * parameters per surface as on older asics, we just select
2283  * which index in the tiling table we want to use, and the
2284  * surface uses those parameters (CIK).
2285  */
2286 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2287 {
2288         const u32 num_tile_mode_states = 32;
2289         const u32 num_secondary_tile_mode_states = 16;
2290         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2291         u32 num_pipe_configs;
2292         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2293                 rdev->config.cik.max_shader_engines;
2294
2295         switch (rdev->config.cik.mem_row_size_in_kb) {
2296         case 1:
2297                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2298                 break;
2299         case 2:
2300         default:
2301                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2302                 break;
2303         case 4:
2304                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2305                 break;
2306         }
2307
2308         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2309         if (num_pipe_configs > 8)
2310                 num_pipe_configs = 16;
2311
2312         if (num_pipe_configs == 16) {
2313                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2314                         switch (reg_offset) {
2315                         case 0:
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2320                                 break;
2321                         case 1:
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2326                                 break;
2327                         case 2:
2328                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2330                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2332                                 break;
2333                         case 3:
2334                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2336                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2338                                 break;
2339                         case 4:
2340                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2342                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                                                  TILE_SPLIT(split_equal_to_row_size));
2344                                 break;
2345                         case 5:
2346                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349                                 break;
2350                         case 6:
2351                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2352                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2355                                 break;
2356                         case 7:
2357                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2358                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360                                                  TILE_SPLIT(split_equal_to_row_size));
2361                                 break;
2362                         case 8:
2363                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2364                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2365                                 break;
2366                         case 9:
2367                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2368                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2370                                 break;
2371                         case 10:
2372                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2374                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376                                 break;
2377                         case 11:
2378                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2379                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2380                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2381                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382                                 break;
2383                         case 12:
2384                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388                                 break;
2389                         case 13:
2390                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2393                                 break;
2394                         case 14:
2395                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2397                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399                                 break;
2400                         case 16:
2401                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2402                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2404                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405                                 break;
2406                         case 17:
2407                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2408                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2409                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411                                 break;
2412                         case 27:
2413                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2416                                 break;
2417                         case 28:
2418                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2420                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422                                 break;
2423                         case 29:
2424                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2426                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2427                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428                                 break;
2429                         case 30:
2430                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                                 break;
2435                         default:
2436                                 gb_tile_moden = 0;
2437                                 break;
2438                         }
2439                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2440                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2441                 }
2442                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2443                         switch (reg_offset) {
2444                         case 0:
2445                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2449                                 break;
2450                         case 1:
2451                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2453                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2455                                 break;
2456                         case 2:
2457                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2461                                 break;
2462                         case 3:
2463                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2465                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2466                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2467                                 break;
2468                         case 4:
2469                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2473                                 break;
2474                         case 5:
2475                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2478                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2479                                 break;
2480                         case 6:
2481                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2485                                 break;
2486                         case 8:
2487                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2489                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2490                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2491                                 break;
2492                         case 9:
2493                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2495                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2497                                 break;
2498                         case 10:
2499                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2501                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2502                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2503                                 break;
2504                         case 11:
2505                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2507                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2508                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2509                                 break;
2510                         case 12:
2511                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2514                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2515                                 break;
2516                         case 13:
2517                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2521                                 break;
2522                         case 14:
2523                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2527                                 break;
2528                         default:
2529                                 gb_tile_moden = 0;
2530                                 break;
2531                         }
2532                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2533                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2534                 }
2535         } else if (num_pipe_configs == 8) {
2536                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2537                         switch (reg_offset) {
2538                         case 0:
2539                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2541                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2543                                 break;
2544                         case 1:
2545                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2547                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2549                                 break;
2550                         case 2:
2551                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555                                 break;
2556                         case 3:
2557                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2558                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2559                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2561                                 break;
2562                         case 4:
2563                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2565                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                                                  TILE_SPLIT(split_equal_to_row_size));
2567                                 break;
2568                         case 5:
2569                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2572                                 break;
2573                         case 6:
2574                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2576                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2578                                 break;
2579                         case 7:
2580                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2581                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2582                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                                  TILE_SPLIT(split_equal_to_row_size));
2584                                 break;
2585                         case 8:
2586                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2587                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2588                                 break;
2589                         case 9:
2590                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2591                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2593                                 break;
2594                         case 10:
2595                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599                                 break;
2600                         case 11:
2601                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2602                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2604                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605                                 break;
2606                         case 12:
2607                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2609                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2611                                 break;
2612                         case 13:
2613                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2616                                 break;
2617                         case 14:
2618                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                                 break;
2623                         case 16:
2624                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2626                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2627                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628                                 break;
2629                         case 17:
2630                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634                                 break;
2635                         case 27:
2636                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2639                                 break;
2640                         case 28:
2641                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2643                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645                                 break;
2646                         case 29:
2647                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2649                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2650                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651                                 break;
2652                         case 30:
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                                 break;
2658                         default:
2659                                 gb_tile_moden = 0;
2660                                 break;
2661                         }
2662                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2663                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2664                 }
2665                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2666                         switch (reg_offset) {
2667                         case 0:
2668                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2672                                 break;
2673                         case 1:
2674                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2676                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2677                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2678                                 break;
2679                         case 2:
2680                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2683                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2684                                 break;
2685                         case 3:
2686                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2689                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2690                                 break;
2691                         case 4:
2692                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2694                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2695                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2696                                 break;
2697                         case 5:
2698                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2700                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2701                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2702                                 break;
2703                         case 6:
2704                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2705                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2706                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2707                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2708                                 break;
2709                         case 8:
2710                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2712                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2713                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2714                                 break;
2715                         case 9:
2716                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2720                                 break;
2721                         case 10:
2722                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2724                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2725                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2726                                 break;
2727                         case 11:
2728                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2730                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2731                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2732                                 break;
2733                         case 12:
2734                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2738                                 break;
2739                         case 13:
2740                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2743                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2744                                 break;
2745                         case 14:
2746                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2750                                 break;
2751                         default:
2752                                 gb_tile_moden = 0;
2753                                 break;
2754                         }
2755                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2756                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2757                 }
2758         } else if (num_pipe_configs == 4) {
2759                 if (num_rbs == 4) {
2760                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2761                                 switch (reg_offset) {
2762                                 case 0:
2763                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2765                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2767                                         break;
2768                                 case 1:
2769                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2771                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2773                                         break;
2774                                 case 2:
2775                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779                                         break;
2780                                 case 3:
2781                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2783                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2785                                         break;
2786                                 case 4:
2787                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2789                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                                          TILE_SPLIT(split_equal_to_row_size));
2791                                         break;
2792                                 case 5:
2793                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2794                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796                                         break;
2797                                 case 6:
2798                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2800                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2802                                         break;
2803                                 case 7:
2804                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2805                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2806                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807                                                          TILE_SPLIT(split_equal_to_row_size));
2808                                         break;
2809                                 case 8:
2810                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2811                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2812                                         break;
2813                                 case 9:
2814                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2817                                         break;
2818                                 case 10:
2819                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2822                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2823                                         break;
2824                                 case 11:
2825                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2826                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2829                                         break;
2830                                 case 12:
2831                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2833                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835                                         break;
2836                                 case 13:
2837                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2838                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2840                                         break;
2841                                 case 14:
2842                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2844                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2846                                         break;
2847                                 case 16:
2848                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2851                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2852                                         break;
2853                                 case 17:
2854                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2855                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2858                                         break;
2859                                 case 27:
2860                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2861                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2863                                         break;
2864                                 case 28:
2865                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2866                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2867                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869                                         break;
2870                                 case 29:
2871                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2873                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2874                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875                                         break;
2876                                 case 30:
2877                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2878                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2879                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881                                         break;
2882                                 default:
2883                                         gb_tile_moden = 0;
2884                                         break;
2885                                 }
2886                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2887                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2888                         }
2889                 } else if (num_rbs < 4) {
2890                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2891                                 switch (reg_offset) {
2892                                 case 0:
2893                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2896                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2897                                         break;
2898                                 case 1:
2899                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2901                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2902                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2903                                         break;
2904                                 case 2:
2905                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2907                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2908                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2909                                         break;
2910                                 case 3:
2911                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2913                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2914                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2915                                         break;
2916                                 case 4:
2917                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2920                                                          TILE_SPLIT(split_equal_to_row_size));
2921                                         break;
2922                                 case 5:
2923                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2925                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2926                                         break;
2927                                 case 6:
2928                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2929                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2930                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2931                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2932                                         break;
2933                                 case 7:
2934                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2936                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2937                                                          TILE_SPLIT(split_equal_to_row_size));
2938                                         break;
2939                                 case 8:
2940                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2941                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2942                                         break;
2943                                 case 9:
2944                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2946                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2947                                         break;
2948                                 case 10:
2949                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2950                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2952                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953                                         break;
2954                                 case 11:
2955                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2956                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2957                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                                         break;
2960                                 case 12:
2961                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2963                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965                                         break;
2966                                 case 13:
2967                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2968                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2969                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2970                                         break;
2971                                 case 14:
2972                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2973                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2975                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2976                                         break;
2977                                 case 16:
2978                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2979                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2981                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2982                                         break;
2983                                 case 17:
2984                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2985                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2986                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2987                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2988                                         break;
2989                                 case 27:
2990                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2991                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2992                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2993                                         break;
2994                                 case 28:
2995                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2996                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2997                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2998                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2999                                         break;
3000                                 case 29:
3001                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3003                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3005                                         break;
3006                                 case 30:
3007                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3008                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011                                         break;
3012                                 default:
3013                                         gb_tile_moden = 0;
3014                                         break;
3015                                 }
3016                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3017                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3018                         }
3019                 }
3020                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3021                         switch (reg_offset) {
3022                         case 0:
3023                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3027                                 break;
3028                         case 1:
3029                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3031                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3033                                 break;
3034                         case 2:
3035                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3038                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3039                                 break;
3040                         case 3:
3041                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3043                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3044                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3045                                 break;
3046                         case 4:
3047                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3048                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3049                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3050                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3051                                 break;
3052                         case 5:
3053                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3056                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3057                                 break;
3058                         case 6:
3059                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3063                                 break;
3064                         case 8:
3065                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3066                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3067                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3069                                 break;
3070                         case 9:
3071                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3072                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3073                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3074                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3075                                 break;
3076                         case 10:
3077                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3079                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3080                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3081                                 break;
3082                         case 11:
3083                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3085                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3086                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3087                                 break;
3088                         case 12:
3089                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3092                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3093                                 break;
3094                         case 13:
3095                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3096                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3097                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3098                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3099                                 break;
3100                         case 14:
3101                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3104                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3105                                 break;
3106                         default:
3107                                 gb_tile_moden = 0;
3108                                 break;
3109                         }
3110                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3111                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3112                 }
3113         } else if (num_pipe_configs == 2) {
3114                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3115                         switch (reg_offset) {
3116                         case 0:
3117                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3119                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3120                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3121                                 break;
3122                         case 1:
3123                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3125                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3126                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3127                                 break;
3128                         case 2:
3129                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3130                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3131                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3132                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3133                                 break;
3134                         case 3:
3135                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3137                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3138                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3139                                 break;
3140                         case 4:
3141                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3143                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3144                                                  TILE_SPLIT(split_equal_to_row_size));
3145                                 break;
3146                         case 5:
3147                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3150                                 break;
3151                         case 6:
3152                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3153                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3154                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3155                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3156                                 break;
3157                         case 7:
3158                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3159                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3160                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                                  TILE_SPLIT(split_equal_to_row_size));
3162                                 break;
3163                         case 8:
3164                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3165                                                 PIPE_CONFIG(ADDR_SURF_P2);
3166                                 break;
3167                         case 9:
3168                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3169                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3170                                                  PIPE_CONFIG(ADDR_SURF_P2));
3171                                 break;
3172                         case 10:
3173                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3175                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3176                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177                                 break;
3178                         case 11:
3179                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3180                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3181                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183                                 break;
3184                         case 12:
3185                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3187                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3188                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3189                                 break;
3190                         case 13:
3191                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3192                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3193                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3194                                 break;
3195                         case 14:
3196                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3198                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3199                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200                                 break;
3201                         case 16:
3202                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3203                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3204                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3205                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3206                                 break;
3207                         case 17:
3208                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3209                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3211                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3212                                 break;
3213                         case 27:
3214                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3215                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3216                                                  PIPE_CONFIG(ADDR_SURF_P2));
3217                                 break;
3218                         case 28:
3219                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3220                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3221                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3222                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223                                 break;
3224                         case 29:
3225                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3226                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3227                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3228                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3229                                 break;
3230                         case 30:
3231                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3232                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3233                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235                                 break;
3236                         default:
3237                                 gb_tile_moden = 0;
3238                                 break;
3239                         }
3240                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3241                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3242                 }
3243                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3244                         switch (reg_offset) {
3245                         case 0:
3246                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3248                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3250                                 break;
3251                         case 1:
3252                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3253                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3256                                 break;
3257                         case 2:
3258                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3260                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3261                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3262                                 break;
3263                         case 3:
3264                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3266                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3268                                 break;
3269                         case 4:
3270                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3271                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3272                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3274                                 break;
3275                         case 5:
3276                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3280                                 break;
3281                         case 6:
3282                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3285                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3286                                 break;
3287                         case 8:
3288                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3289                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3290                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3291                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3292                                 break;
3293                         case 9:
3294                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3295                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3296                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3297                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3298                                 break;
3299                         case 10:
3300                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3304                                 break;
3305                         case 11:
3306                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3307                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3310                                 break;
3311                         case 12:
3312                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3314                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3316                                 break;
3317                         case 13:
3318                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3321                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3322                                 break;
3323                         case 14:
3324                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3328                                 break;
3329                         default:
3330                                 gb_tile_moden = 0;
3331                                 break;
3332                         }
3333                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3334                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3335                 }
3336         } else
3337                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3338 }
3339
3340 /**
3341  * cik_select_se_sh - select which SE, SH to address
3342  *
3343  * @rdev: radeon_device pointer
3344  * @se_num: shader engine to address
3345  * @sh_num: sh block to address
3346  *
3347  * Select which SE, SH combinations to address. Certain
3348  * registers are instanced per SE or SH.  0xffffffff means
3349  * broadcast to all SEs or SHs (CIK).
3350  */
3351 static void cik_select_se_sh(struct radeon_device *rdev,
3352                              u32 se_num, u32 sh_num)
3353 {
3354         u32 data = INSTANCE_BROADCAST_WRITES;
3355
3356         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3357                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3358         else if (se_num == 0xffffffff)
3359                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3360         else if (sh_num == 0xffffffff)
3361                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3362         else
3363                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3364         WREG32(GRBM_GFX_INDEX, data);
3365 }
3366
3367 /**
3368  * cik_create_bitmask - create a bitmask
3369  *
3370  * @bit_width: length of the mask
3371  *
3372  * create a variable length bit mask (CIK).
3373  * Returns the bitmask.
3374  */
3375 static u32 cik_create_bitmask(u32 bit_width)
3376 {
3377         u32 i, mask = 0;
3378
3379         for (i = 0; i < bit_width; i++) {
3380                 mask <<= 1;
3381                 mask |= 1;
3382         }
3383         return mask;
3384 }
3385
3386 /**
3387  * cik_get_rb_disabled - computes the mask of disabled RBs
3388  *
3389  * @rdev: radeon_device pointer
3390  * @max_rb_num: max RBs (render backends) for the asic
3391  * @se_num: number of SEs (shader engines) for the asic
3392  * @sh_per_se: number of SH blocks per SE for the asic
3393  *
3394  * Calculates the bitmask of disabled RBs (CIK).
3395  * Returns the disabled RB bitmask.
3396  */
3397 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3398                               u32 max_rb_num_per_se,
3399                               u32 sh_per_se)
3400 {
3401         u32 data, mask;
3402
3403         data = RREG32(CC_RB_BACKEND_DISABLE);
3404         if (data & 1)
3405                 data &= BACKEND_DISABLE_MASK;
3406         else
3407                 data = 0;
3408         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3409
3410         data >>= BACKEND_DISABLE_SHIFT;
3411
3412         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3413
3414         return data & mask;
3415 }
3416
3417 /**
3418  * cik_setup_rb - setup the RBs on the asic
3419  *
3420  * @rdev: radeon_device pointer
3421  * @se_num: number of SEs (shader engines) for the asic
3422  * @sh_per_se: number of SH blocks per SE for the asic
3423  * @max_rb_num: max RBs (render backends) for the asic
3424  *
3425  * Configures per-SE/SH RB registers (CIK).
3426  */
3427 static void cik_setup_rb(struct radeon_device *rdev,
3428                          u32 se_num, u32 sh_per_se,
3429                          u32 max_rb_num_per_se)
3430 {
3431         int i, j;
3432         u32 data, mask;
3433         u32 disabled_rbs = 0;
3434         u32 enabled_rbs = 0;
3435
3436         mutex_lock(&rdev->grbm_idx_mutex);
3437         for (i = 0; i < se_num; i++) {
3438                 for (j = 0; j < sh_per_se; j++) {
3439                         cik_select_se_sh(rdev, i, j);
3440                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3441                         if (rdev->family == CHIP_HAWAII)
3442                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3443                         else
3444                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3445                 }
3446         }
3447         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3448         mutex_unlock(&rdev->grbm_idx_mutex);
3449
3450         mask = 1;
3451         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3452                 if (!(disabled_rbs & mask))
3453                         enabled_rbs |= mask;
3454                 mask <<= 1;
3455         }
3456
3457         rdev->config.cik.backend_enable_mask = enabled_rbs;
3458
3459         mutex_lock(&rdev->grbm_idx_mutex);
3460         for (i = 0; i < se_num; i++) {
3461                 cik_select_se_sh(rdev, i, 0xffffffff);
3462                 data = 0;
3463                 for (j = 0; j < sh_per_se; j++) {
3464                         switch (enabled_rbs & 3) {
3465                         case 0:
3466                                 if (j == 0)
3467                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3468                                 else
3469                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3470                                 break;
3471                         case 1:
3472                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3473                                 break;
3474                         case 2:
3475                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3476                                 break;
3477                         case 3:
3478                         default:
3479                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3480                                 break;
3481                         }
3482                         enabled_rbs >>= 2;
3483                 }
3484                 WREG32(PA_SC_RASTER_CONFIG, data);
3485         }
3486         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3487         mutex_unlock(&rdev->grbm_idx_mutex);
3488 }
3489
3490 /**
3491  * cik_gpu_init - setup the 3D engine
3492  *
3493  * @rdev: radeon_device pointer
3494  *
3495  * Configures the 3D engine and tiling configuration
3496  * registers so that the 3D engine is usable.
3497  */
3498 static void cik_gpu_init(struct radeon_device *rdev)
3499 {
3500         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3501         u32 mc_shared_chmap, mc_arb_ramcfg;
3502         u32 hdp_host_path_cntl;
3503         u32 tmp;
3504         int i, j;
3505
3506         switch (rdev->family) {
3507         case CHIP_BONAIRE:
3508                 rdev->config.cik.max_shader_engines = 2;
3509                 rdev->config.cik.max_tile_pipes = 4;
3510                 rdev->config.cik.max_cu_per_sh = 7;
3511                 rdev->config.cik.max_sh_per_se = 1;
3512                 rdev->config.cik.max_backends_per_se = 2;
3513                 rdev->config.cik.max_texture_channel_caches = 4;
3514                 rdev->config.cik.max_gprs = 256;
3515                 rdev->config.cik.max_gs_threads = 32;
3516                 rdev->config.cik.max_hw_contexts = 8;
3517
3518                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3519                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3520                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3521                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3522                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3523                 break;
3524         case CHIP_HAWAII:
3525                 rdev->config.cik.max_shader_engines = 4;
3526                 rdev->config.cik.max_tile_pipes = 16;
3527                 rdev->config.cik.max_cu_per_sh = 11;
3528                 rdev->config.cik.max_sh_per_se = 1;
3529                 rdev->config.cik.max_backends_per_se = 4;
3530                 rdev->config.cik.max_texture_channel_caches = 16;
3531                 rdev->config.cik.max_gprs = 256;
3532                 rdev->config.cik.max_gs_threads = 32;
3533                 rdev->config.cik.max_hw_contexts = 8;
3534
3535                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3536                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3537                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3538                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3539                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3540                 break;
3541         case CHIP_KAVERI:
3542                 rdev->config.cik.max_shader_engines = 1;
3543                 rdev->config.cik.max_tile_pipes = 4;
3544                 if ((rdev->pdev->device == 0x1304) ||
3545                     (rdev->pdev->device == 0x1305) ||
3546                     (rdev->pdev->device == 0x130C) ||
3547                     (rdev->pdev->device == 0x130F) ||
3548                     (rdev->pdev->device == 0x1310) ||
3549                     (rdev->pdev->device == 0x1311) ||
3550                     (rdev->pdev->device == 0x131C)) {
3551                         rdev->config.cik.max_cu_per_sh = 8;
3552                         rdev->config.cik.max_backends_per_se = 2;
3553                 } else if ((rdev->pdev->device == 0x1309) ||
3554                            (rdev->pdev->device == 0x130A) ||
3555                            (rdev->pdev->device == 0x130D) ||
3556                            (rdev->pdev->device == 0x1313) ||
3557                            (rdev->pdev->device == 0x131D)) {
3558                         rdev->config.cik.max_cu_per_sh = 6;
3559                         rdev->config.cik.max_backends_per_se = 2;
3560                 } else if ((rdev->pdev->device == 0x1306) ||
3561                            (rdev->pdev->device == 0x1307) ||
3562                            (rdev->pdev->device == 0x130B) ||
3563                            (rdev->pdev->device == 0x130E) ||
3564                            (rdev->pdev->device == 0x1315) ||
3565                            (rdev->pdev->device == 0x1318) ||
3566                            (rdev->pdev->device == 0x131B)) {
3567                         rdev->config.cik.max_cu_per_sh = 4;
3568                         rdev->config.cik.max_backends_per_se = 1;
3569                 } else {
3570                         rdev->config.cik.max_cu_per_sh = 3;
3571                         rdev->config.cik.max_backends_per_se = 1;
3572                 }
3573                 rdev->config.cik.max_sh_per_se = 1;
3574                 rdev->config.cik.max_texture_channel_caches = 4;
3575                 rdev->config.cik.max_gprs = 256;
3576                 rdev->config.cik.max_gs_threads = 16;
3577                 rdev->config.cik.max_hw_contexts = 8;
3578
3579                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3580                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3581                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3582                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3583                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3584                 break;
3585         case CHIP_KABINI:
3586         case CHIP_MULLINS:
3587         default:
3588                 rdev->config.cik.max_shader_engines = 1;
3589                 rdev->config.cik.max_tile_pipes = 2;
3590                 rdev->config.cik.max_cu_per_sh = 2;
3591                 rdev->config.cik.max_sh_per_se = 1;
3592                 rdev->config.cik.max_backends_per_se = 1;
3593                 rdev->config.cik.max_texture_channel_caches = 2;
3594                 rdev->config.cik.max_gprs = 256;
3595                 rdev->config.cik.max_gs_threads = 16;
3596                 rdev->config.cik.max_hw_contexts = 8;
3597
3598                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3599                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3600                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3601                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3602                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3603                 break;
3604         }
3605
3606         /* Initialize HDP */
3607         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3608                 WREG32((0x2c14 + j), 0x00000000);
3609                 WREG32((0x2c18 + j), 0x00000000);
3610                 WREG32((0x2c1c + j), 0x00000000);
3611                 WREG32((0x2c20 + j), 0x00000000);
3612                 WREG32((0x2c24 + j), 0x00000000);
3613         }
3614
3615         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3616         WREG32(SRBM_INT_CNTL, 0x1);
3617         WREG32(SRBM_INT_ACK, 0x1);
3618
3619         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3620
3621         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3622         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3623
3624         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3625         rdev->config.cik.mem_max_burst_length_bytes = 256;
3626         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3627         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3628         if (rdev->config.cik.mem_row_size_in_kb > 4)
3629                 rdev->config.cik.mem_row_size_in_kb = 4;
3630         /* XXX use MC settings? */
3631         rdev->config.cik.shader_engine_tile_size = 32;
3632         rdev->config.cik.num_gpus = 1;
3633         rdev->config.cik.multi_gpu_tile_size = 64;
3634
3635         /* fix up row size */
3636         gb_addr_config &= ~ROW_SIZE_MASK;
3637         switch (rdev->config.cik.mem_row_size_in_kb) {
3638         case 1:
3639         default:
3640                 gb_addr_config |= ROW_SIZE(0);
3641                 break;
3642         case 2:
3643                 gb_addr_config |= ROW_SIZE(1);
3644                 break;
3645         case 4:
3646                 gb_addr_config |= ROW_SIZE(2);
3647                 break;
3648         }
3649
3650         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3651          * not have bank info, so create a custom tiling dword.
3652          * bits 3:0   num_pipes
3653          * bits 7:4   num_banks
3654          * bits 11:8  group_size
3655          * bits 15:12 row_size
3656          */
3657         rdev->config.cik.tile_config = 0;
3658         switch (rdev->config.cik.num_tile_pipes) {
3659         case 1:
3660                 rdev->config.cik.tile_config |= (0 << 0);
3661                 break;
3662         case 2:
3663                 rdev->config.cik.tile_config |= (1 << 0);
3664                 break;
3665         case 4:
3666                 rdev->config.cik.tile_config |= (2 << 0);
3667                 break;
3668         case 8:
3669         default:
3670                 /* XXX what about 12? */
3671                 rdev->config.cik.tile_config |= (3 << 0);
3672                 break;
3673         }
3674         rdev->config.cik.tile_config |=
3675                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3676         rdev->config.cik.tile_config |=
3677                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3678         rdev->config.cik.tile_config |=
3679                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3680
3681         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3682         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3683         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3684         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3685         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3686         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3687         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3688         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3689
3690         cik_tiling_mode_table_init(rdev);
3691
3692         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3693                      rdev->config.cik.max_sh_per_se,
3694                      rdev->config.cik.max_backends_per_se);
3695
3696         rdev->config.cik.active_cus = 0;
3697         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3698                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3699                         rdev->config.cik.active_cus +=
3700                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3701                 }
3702         }
3703
3704         /* set HW defaults for 3D engine */
3705         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3706
3707         mutex_lock(&rdev->grbm_idx_mutex);
3708         /*
3709          * making sure that the following register writes will be broadcasted
3710          * to all the shaders
3711          */
3712         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3713         WREG32(SX_DEBUG_1, 0x20);
3714
3715         WREG32(TA_CNTL_AUX, 0x00010000);
3716
3717         tmp = RREG32(SPI_CONFIG_CNTL);
3718         tmp |= 0x03000000;
3719         WREG32(SPI_CONFIG_CNTL, tmp);
3720
3721         WREG32(SQ_CONFIG, 1);
3722
3723         WREG32(DB_DEBUG, 0);
3724
3725         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3726         tmp |= 0x00000400;
3727         WREG32(DB_DEBUG2, tmp);
3728
3729         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3730         tmp |= 0x00020200;
3731         WREG32(DB_DEBUG3, tmp);
3732
3733         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3734         tmp |= 0x00018208;
3735         WREG32(CB_HW_CONTROL, tmp);
3736
3737         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3738
3739         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3740                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3741                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3742                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3743
3744         WREG32(VGT_NUM_INSTANCES, 1);
3745
3746         WREG32(CP_PERFMON_CNTL, 0);
3747
3748         WREG32(SQ_CONFIG, 0);
3749
3750         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3751                                           FORCE_EOV_MAX_REZ_CNT(255)));
3752
3753         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3754                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3755
3756         WREG32(VGT_GS_VERTEX_REUSE, 16);
3757         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3758
3759         tmp = RREG32(HDP_MISC_CNTL);
3760         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3761         WREG32(HDP_MISC_CNTL, tmp);
3762
3763         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3764         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3765
3766         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3767         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3768         mutex_unlock(&rdev->grbm_idx_mutex);
3769
3770         udelay(50);
3771 }
3772
3773 /*
3774  * GPU scratch registers helpers function.
3775  */
3776 /**
3777  * cik_scratch_init - setup driver info for CP scratch regs
3778  *
3779  * @rdev: radeon_device pointer
3780  *
3781  * Set up the number and offset of the CP scratch registers.
3782  * NOTE: use of CP scratch registers is a legacy inferface and
3783  * is not used by default on newer asics (r6xx+).  On newer asics,
3784  * memory buffers are used for fences rather than scratch regs.
3785  */
3786 static void cik_scratch_init(struct radeon_device *rdev)
3787 {
3788         int i;
3789
3790         rdev->scratch.num_reg = 7;
3791         rdev->scratch.reg_base = SCRATCH_REG0;
3792         for (i = 0; i < rdev->scratch.num_reg; i++) {
3793                 rdev->scratch.free[i] = true;
3794                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3795         }
3796 }
3797
3798 /**
3799  * cik_ring_test - basic gfx ring test
3800  *
3801  * @rdev: radeon_device pointer
3802  * @ring: radeon_ring structure holding ring information
3803  *
3804  * Allocate a scratch register and write to it using the gfx ring (CIK).
3805  * Provides a basic gfx ring test to verify that the ring is working.
3806  * Used by cik_cp_gfx_resume();
3807  * Returns 0 on success, error on failure.
3808  */
3809 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3810 {
3811         uint32_t scratch;
3812         uint32_t tmp = 0;
3813         unsigned i;
3814         int r;
3815
3816         r = radeon_scratch_get(rdev, &scratch);
3817         if (r) {
3818                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3819                 return r;
3820         }
3821         WREG32(scratch, 0xCAFEDEAD);
3822         r = radeon_ring_lock(rdev, ring, 3);
3823         if (r) {
3824                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3825                 radeon_scratch_free(rdev, scratch);
3826                 return r;
3827         }
3828         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3829         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3830         radeon_ring_write(ring, 0xDEADBEEF);
3831         radeon_ring_unlock_commit(rdev, ring, false);
3832
3833         for (i = 0; i < rdev->usec_timeout; i++) {
3834                 tmp = RREG32(scratch);
3835                 if (tmp == 0xDEADBEEF)
3836                         break;
3837                 DRM_UDELAY(1);
3838         }
3839         if (i < rdev->usec_timeout) {
3840                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3841         } else {
3842                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3843                           ring->idx, scratch, tmp);
3844                 r = -EINVAL;
3845         }
3846         radeon_scratch_free(rdev, scratch);
3847         return r;
3848 }
3849
3850 /**
3851  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3852  *
3853  * @rdev: radeon_device pointer
3854  * @ridx: radeon ring index
3855  *
3856  * Emits an hdp flush on the cp.
3857  */
3858 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3859                                        int ridx)
3860 {
3861         struct radeon_ring *ring = &rdev->ring[ridx];
3862         u32 ref_and_mask;
3863
3864         switch (ring->idx) {
3865         case CAYMAN_RING_TYPE_CP1_INDEX:
3866         case CAYMAN_RING_TYPE_CP2_INDEX:
3867         default:
3868                 switch (ring->me) {
3869                 case 0:
3870                         ref_and_mask = CP2 << ring->pipe;
3871                         break;
3872                 case 1:
3873                         ref_and_mask = CP6 << ring->pipe;
3874                         break;
3875                 default:
3876                         return;
3877                 }
3878                 break;
3879         case RADEON_RING_TYPE_GFX_INDEX:
3880                 ref_and_mask = CP0;
3881                 break;
3882         }
3883
3884         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3885         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3886                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3887                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3888         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3889         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3890         radeon_ring_write(ring, ref_and_mask);
3891         radeon_ring_write(ring, ref_and_mask);
3892         radeon_ring_write(ring, 0x20); /* poll interval */
3893 }
3894
3895 /**
3896  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3897  *
3898  * @rdev: radeon_device pointer
3899  * @fence: radeon fence object
3900  *
3901  * Emits a fence sequnce number on the gfx ring and flushes
3902  * GPU caches.
3903  */
3904 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3905                              struct radeon_fence *fence)
3906 {
3907         struct radeon_ring *ring = &rdev->ring[fence->ring];
3908         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3909
3910         /* Workaround for cache flush problems. First send a dummy EOP
3911          * event down the pipe with seq one below.
3912          */
3913         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3914         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3915                                  EOP_TC_ACTION_EN |
3916                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3917                                  EVENT_INDEX(5)));
3918         radeon_ring_write(ring, addr & 0xfffffffc);
3919         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3920                                 DATA_SEL(1) | INT_SEL(0));
3921         radeon_ring_write(ring, fence->seq - 1);
3922         radeon_ring_write(ring, 0);
3923
3924         /* Then send the real EOP event down the pipe. */
3925         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3926         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3927                                  EOP_TC_ACTION_EN |
3928                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3929                                  EVENT_INDEX(5)));
3930         radeon_ring_write(ring, addr & 0xfffffffc);
3931         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3932         radeon_ring_write(ring, fence->seq);
3933         radeon_ring_write(ring, 0);
3934 }
3935
3936 /**
3937  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3938  *
3939  * @rdev: radeon_device pointer
3940  * @fence: radeon fence object
3941  *
3942  * Emits a fence sequnce number on the compute ring and flushes
3943  * GPU caches.
3944  */
3945 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3946                                  struct radeon_fence *fence)
3947 {
3948         struct radeon_ring *ring = &rdev->ring[fence->ring];
3949         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3950
3951         /* RELEASE_MEM - flush caches, send int */
3952         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3953         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3954                                  EOP_TC_ACTION_EN |
3955                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3956                                  EVENT_INDEX(5)));
3957         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3958         radeon_ring_write(ring, addr & 0xfffffffc);
3959         radeon_ring_write(ring, upper_32_bits(addr));
3960         radeon_ring_write(ring, fence->seq);
3961         radeon_ring_write(ring, 0);
3962 }
3963
3964 /**
3965  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3966  *
3967  * @rdev: radeon_device pointer
3968  * @ring: radeon ring buffer object
3969  * @semaphore: radeon semaphore object
3970  * @emit_wait: Is this a sempahore wait?
3971  *
3972  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3973  * from running ahead of semaphore waits.
3974  */
3975 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3976                              struct radeon_ring *ring,
3977                              struct radeon_semaphore *semaphore,
3978                              bool emit_wait)
3979 {
3980         uint64_t addr = semaphore->gpu_addr;
3981         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3982
3983         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3984         radeon_ring_write(ring, lower_32_bits(addr));
3985         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3986
3987         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3988                 /* Prevent the PFP from running ahead of the semaphore wait */
3989                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3990                 radeon_ring_write(ring, 0x0);
3991         }
3992
3993         return true;
3994 }
3995
3996 /**
3997  * cik_copy_cpdma - copy pages using the CP DMA engine
3998  *
3999  * @rdev: radeon_device pointer
4000  * @src_offset: src GPU address
4001  * @dst_offset: dst GPU address
4002  * @num_gpu_pages: number of GPU pages to xfer
4003  * @resv: reservation object to sync to
4004  *
4005  * Copy GPU paging using the CP DMA engine (CIK+).
4006  * Used by the radeon ttm implementation to move pages if
4007  * registered as the asic copy callback.
4008  */
4009 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4010                                     uint64_t src_offset, uint64_t dst_offset,
4011                                     unsigned num_gpu_pages,
4012                                     struct reservation_object *resv)
4013 {
4014         struct radeon_fence *fence;
4015         struct radeon_sync sync;
4016         int ring_index = rdev->asic->copy.blit_ring_index;
4017         struct radeon_ring *ring = &rdev->ring[ring_index];
4018         u32 size_in_bytes, cur_size_in_bytes, control;
4019         int i, num_loops;
4020         int r = 0;
4021
4022         radeon_sync_create(&sync);
4023
4024         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4025         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4026         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4027         if (r) {
4028                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4029                 radeon_sync_free(rdev, &sync, NULL);
4030                 return ERR_PTR(r);
4031         }
4032
4033         radeon_sync_resv(rdev, &sync, resv, false);
4034         radeon_sync_rings(rdev, &sync, ring->idx);
4035
4036         for (i = 0; i < num_loops; i++) {
4037                 cur_size_in_bytes = size_in_bytes;
4038                 if (cur_size_in_bytes > 0x1fffff)
4039                         cur_size_in_bytes = 0x1fffff;
4040                 size_in_bytes -= cur_size_in_bytes;
4041                 control = 0;
4042                 if (size_in_bytes == 0)
4043                         control |= PACKET3_DMA_DATA_CP_SYNC;
4044                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4045                 radeon_ring_write(ring, control);
4046                 radeon_ring_write(ring, lower_32_bits(src_offset));
4047                 radeon_ring_write(ring, upper_32_bits(src_offset));
4048                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4049                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4050                 radeon_ring_write(ring, cur_size_in_bytes);
4051                 src_offset += cur_size_in_bytes;
4052                 dst_offset += cur_size_in_bytes;
4053         }
4054
4055         r = radeon_fence_emit(rdev, &fence, ring->idx);
4056         if (r) {
4057                 radeon_ring_unlock_undo(rdev, ring);
4058                 radeon_sync_free(rdev, &sync, NULL);
4059                 return ERR_PTR(r);
4060         }
4061
4062         radeon_ring_unlock_commit(rdev, ring, false);
4063         radeon_sync_free(rdev, &sync, fence);
4064
4065         return fence;
4066 }
4067
4068 /*
4069  * IB stuff
4070  */
4071 /**
4072  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4073  *
4074  * @rdev: radeon_device pointer
4075  * @ib: radeon indirect buffer object
4076  *
4077  * Emits an DE (drawing engine) or CE (constant engine) IB
4078  * on the gfx ring.  IBs are usually generated by userspace
4079  * acceleration drivers and submitted to the kernel for
4080  * sheduling on the ring.  This function schedules the IB
4081  * on the gfx ring for execution by the GPU.
4082  */
4083 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4084 {
4085         struct radeon_ring *ring = &rdev->ring[ib->ring];
4086         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4087         u32 header, control = INDIRECT_BUFFER_VALID;
4088
4089         if (ib->is_const_ib) {
4090                 /* set switch buffer packet before const IB */
4091                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4092                 radeon_ring_write(ring, 0);
4093
4094                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4095         } else {
4096                 u32 next_rptr;
4097                 if (ring->rptr_save_reg) {
4098                         next_rptr = ring->wptr + 3 + 4;
4099                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4100                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4101                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4102                         radeon_ring_write(ring, next_rptr);
4103                 } else if (rdev->wb.enabled) {
4104                         next_rptr = ring->wptr + 5 + 4;
4105                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4106                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4107                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4108                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4109                         radeon_ring_write(ring, next_rptr);
4110                 }
4111
4112                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4113         }
4114
4115         control |= ib->length_dw | (vm_id << 24);
4116
4117         radeon_ring_write(ring, header);
4118         radeon_ring_write(ring,
4119 #ifdef __BIG_ENDIAN
4120                           (2 << 0) |
4121 #endif
4122                           (ib->gpu_addr & 0xFFFFFFFC));
4123         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4124         radeon_ring_write(ring, control);
4125 }
4126
4127 /**
4128  * cik_ib_test - basic gfx ring IB test
4129  *
4130  * @rdev: radeon_device pointer
4131  * @ring: radeon_ring structure holding ring information
4132  *
4133  * Allocate an IB and execute it on the gfx ring (CIK).
4134  * Provides a basic gfx ring test to verify that IBs are working.
4135  * Returns 0 on success, error on failure.
4136  */
4137 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4138 {
4139         struct radeon_ib ib;
4140         uint32_t scratch;
4141         uint32_t tmp = 0;
4142         unsigned i;
4143         int r;
4144
4145         r = radeon_scratch_get(rdev, &scratch);
4146         if (r) {
4147                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4148                 return r;
4149         }
4150         WREG32(scratch, 0xCAFEDEAD);
4151         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4152         if (r) {
4153                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4154                 radeon_scratch_free(rdev, scratch);
4155                 return r;
4156         }
4157         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4158         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4159         ib.ptr[2] = 0xDEADBEEF;
4160         ib.length_dw = 3;
4161         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4162         if (r) {
4163                 radeon_scratch_free(rdev, scratch);
4164                 radeon_ib_free(rdev, &ib);
4165                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4166                 return r;
4167         }
4168         r = radeon_fence_wait(ib.fence, false);
4169         if (r) {
4170                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4171                 radeon_scratch_free(rdev, scratch);
4172                 radeon_ib_free(rdev, &ib);
4173                 return r;
4174         }
4175         for (i = 0; i < rdev->usec_timeout; i++) {
4176                 tmp = RREG32(scratch);
4177                 if (tmp == 0xDEADBEEF)
4178                         break;
4179                 DRM_UDELAY(1);
4180         }
4181         if (i < rdev->usec_timeout) {
4182                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4183         } else {
4184                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4185                           scratch, tmp);
4186                 r = -EINVAL;
4187         }
4188         radeon_scratch_free(rdev, scratch);
4189         radeon_ib_free(rdev, &ib);
4190         return r;
4191 }
4192
4193 /*
4194  * CP.
4195  * On CIK, gfx and compute now have independant command processors.
4196  *
4197  * GFX
4198  * Gfx consists of a single ring and can process both gfx jobs and
4199  * compute jobs.  The gfx CP consists of three microengines (ME):
4200  * PFP - Pre-Fetch Parser
4201  * ME - Micro Engine
4202  * CE - Constant Engine
4203  * The PFP and ME make up what is considered the Drawing Engine (DE).
4204  * The CE is an asynchronous engine used for updating buffer desciptors
4205  * used by the DE so that they can be loaded into cache in parallel
4206  * while the DE is processing state update packets.
4207  *
4208  * Compute
4209  * The compute CP consists of two microengines (ME):
4210  * MEC1 - Compute MicroEngine 1
4211  * MEC2 - Compute MicroEngine 2
4212  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4213  * The queues are exposed to userspace and are programmed directly
4214  * by the compute runtime.
4215  */
4216 /**
4217  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4218  *
4219  * @rdev: radeon_device pointer
4220  * @enable: enable or disable the MEs
4221  *
4222  * Halts or unhalts the gfx MEs.
4223  */
4224 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4225 {
4226         if (enable)
4227                 WREG32(CP_ME_CNTL, 0);
4228         else {
4229                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4230                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4231                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4232                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4233         }
4234         udelay(50);
4235 }
4236
4237 /**
4238  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4239  *
4240  * @rdev: radeon_device pointer
4241  *
4242  * Loads the gfx PFP, ME, and CE ucode.
4243  * Returns 0 for success, -EINVAL if the ucode is not available.
4244  */
4245 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4246 {
4247         int i;
4248
4249         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4250                 return -EINVAL;
4251
4252         cik_cp_gfx_enable(rdev, false);
4253
4254         if (rdev->new_fw) {
4255                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4256                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4257                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4258                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4259                 const struct gfx_firmware_header_v1_0 *me_hdr =
4260                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4261                 const __le32 *fw_data;
4262                 u32 fw_size;
4263
4264                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4265                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4266                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4267
4268                 /* PFP */
4269                 fw_data = (const __le32 *)
4270                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4271                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4272                 WREG32(CP_PFP_UCODE_ADDR, 0);
4273                 for (i = 0; i < fw_size; i++)
4274                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4275                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4276
4277                 /* CE */
4278                 fw_data = (const __le32 *)
4279                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4280                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4281                 WREG32(CP_CE_UCODE_ADDR, 0);
4282                 for (i = 0; i < fw_size; i++)
4283                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4284                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4285
4286                 /* ME */
4287                 fw_data = (const __be32 *)
4288                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4289                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4290                 WREG32(CP_ME_RAM_WADDR, 0);
4291                 for (i = 0; i < fw_size; i++)
4292                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4293                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4294                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4295         } else {
4296                 const __be32 *fw_data;
4297
4298                 /* PFP */
4299                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4300                 WREG32(CP_PFP_UCODE_ADDR, 0);
4301                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4302                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4303                 WREG32(CP_PFP_UCODE_ADDR, 0);
4304
4305                 /* CE */
4306                 fw_data = (const __be32 *)rdev->ce_fw->data;
4307                 WREG32(CP_CE_UCODE_ADDR, 0);
4308                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4309                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4310                 WREG32(CP_CE_UCODE_ADDR, 0);
4311
4312                 /* ME */
4313                 fw_data = (const __be32 *)rdev->me_fw->data;
4314                 WREG32(CP_ME_RAM_WADDR, 0);
4315                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4316                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4317                 WREG32(CP_ME_RAM_WADDR, 0);
4318         }
4319
4320         return 0;
4321 }
4322
4323 /**
4324  * cik_cp_gfx_start - start the gfx ring
4325  *
4326  * @rdev: radeon_device pointer
4327  *
4328  * Enables the ring and loads the clear state context and other
4329  * packets required to init the ring.
4330  * Returns 0 for success, error for failure.
4331  */
4332 static int cik_cp_gfx_start(struct radeon_device *rdev)
4333 {
4334         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4335         int r, i;
4336
4337         /* init the CP */
4338         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4339         WREG32(CP_ENDIAN_SWAP, 0);
4340         WREG32(CP_DEVICE_ID, 1);
4341
4342         cik_cp_gfx_enable(rdev, true);
4343
4344         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4345         if (r) {
4346                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4347                 return r;
4348         }
4349
4350         /* init the CE partitions.  CE only used for gfx on CIK */
4351         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4352         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4353         radeon_ring_write(ring, 0x8000);
4354         radeon_ring_write(ring, 0x8000);
4355
4356         /* setup clear context state */
4357         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4358         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4359
4360         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4361         radeon_ring_write(ring, 0x80000000);
4362         radeon_ring_write(ring, 0x80000000);
4363
4364         for (i = 0; i < cik_default_size; i++)
4365                 radeon_ring_write(ring, cik_default_state[i]);
4366
4367         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4368         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4369
4370         /* set clear context state */
4371         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4372         radeon_ring_write(ring, 0);
4373
4374         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4375         radeon_ring_write(ring, 0x00000316);
4376         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4377         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4378
4379         radeon_ring_unlock_commit(rdev, ring, false);
4380
4381         return 0;
4382 }
4383
4384 /**
4385  * cik_cp_gfx_fini - stop the gfx ring
4386  *
4387  * @rdev: radeon_device pointer
4388  *
4389  * Stop the gfx ring and tear down the driver ring
4390  * info.
4391  */
4392 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4393 {
4394         cik_cp_gfx_enable(rdev, false);
4395         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4396 }
4397
4398 /**
4399  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4400  *
4401  * @rdev: radeon_device pointer
4402  *
4403  * Program the location and size of the gfx ring buffer
4404  * and test it to make sure it's working.
4405  * Returns 0 for success, error for failure.
4406  */
4407 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4408 {
4409         struct radeon_ring *ring;
4410         u32 tmp;
4411         u32 rb_bufsz;
4412         u64 rb_addr;
4413         int r;
4414
4415         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4416         if (rdev->family != CHIP_HAWAII)
4417                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4418
4419         /* Set the write pointer delay */
4420         WREG32(CP_RB_WPTR_DELAY, 0);
4421
4422         /* set the RB to use vmid 0 */
4423         WREG32(CP_RB_VMID, 0);
4424
4425         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4426
4427         /* ring 0 - compute and gfx */
4428         /* Set ring buffer size */
4429         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4430         rb_bufsz = order_base_2(ring->ring_size / 8);
4431         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4432 #ifdef __BIG_ENDIAN
4433         tmp |= BUF_SWAP_32BIT;
4434 #endif
4435         WREG32(CP_RB0_CNTL, tmp);
4436
4437         /* Initialize the ring buffer's read and write pointers */
4438         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4439         ring->wptr = 0;
4440         WREG32(CP_RB0_WPTR, ring->wptr);
4441
4442         /* set the wb address wether it's enabled or not */
4443         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4444         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4445
4446         /* scratch register shadowing is no longer supported */
4447         WREG32(SCRATCH_UMSK, 0);
4448
4449         if (!rdev->wb.enabled)
4450                 tmp |= RB_NO_UPDATE;
4451
4452         mdelay(1);
4453         WREG32(CP_RB0_CNTL, tmp);
4454
4455         rb_addr = ring->gpu_addr >> 8;
4456         WREG32(CP_RB0_BASE, rb_addr);
4457         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4458
4459         /* start the ring */
4460         cik_cp_gfx_start(rdev);
4461         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4462         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4463         if (r) {
4464                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4465                 return r;
4466         }
4467
4468         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4469                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4470
4471         return 0;
4472 }
4473
4474 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4475                      struct radeon_ring *ring)
4476 {
4477         u32 rptr;
4478
4479         if (rdev->wb.enabled)
4480                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4481         else
4482                 rptr = RREG32(CP_RB0_RPTR);
4483
4484         return rptr;
4485 }
4486
4487 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4488                      struct radeon_ring *ring)
4489 {
4490         u32 wptr;
4491
4492         wptr = RREG32(CP_RB0_WPTR);
4493
4494         return wptr;
4495 }
4496
4497 void cik_gfx_set_wptr(struct radeon_device *rdev,
4498                       struct radeon_ring *ring)
4499 {
4500         WREG32(CP_RB0_WPTR, ring->wptr);
4501         (void)RREG32(CP_RB0_WPTR);
4502 }
4503
4504 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4505                          struct radeon_ring *ring)
4506 {
4507         u32 rptr;
4508
4509         if (rdev->wb.enabled) {
4510                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4511         } else {
4512                 mutex_lock(&rdev->srbm_mutex);
4513                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4514                 rptr = RREG32(CP_HQD_PQ_RPTR);
4515                 cik_srbm_select(rdev, 0, 0, 0, 0);
4516                 mutex_unlock(&rdev->srbm_mutex);
4517         }
4518
4519         return rptr;
4520 }
4521
4522 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4523                          struct radeon_ring *ring)
4524 {
4525         u32 wptr;
4526
4527         if (rdev->wb.enabled) {
4528                 /* XXX check if swapping is necessary on BE */
4529                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4530         } else {
4531                 mutex_lock(&rdev->srbm_mutex);
4532                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4533                 wptr = RREG32(CP_HQD_PQ_WPTR);
4534                 cik_srbm_select(rdev, 0, 0, 0, 0);
4535                 mutex_unlock(&rdev->srbm_mutex);
4536         }
4537
4538         return wptr;
4539 }
4540
4541 void cik_compute_set_wptr(struct radeon_device *rdev,
4542                           struct radeon_ring *ring)
4543 {
4544         /* XXX check if swapping is necessary on BE */
4545         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4546         WDOORBELL32(ring->doorbell_index, ring->wptr);
4547 }
4548
4549 /**
4550  * cik_cp_compute_enable - enable/disable the compute CP MEs
4551  *
4552  * @rdev: radeon_device pointer
4553  * @enable: enable or disable the MEs
4554  *
4555  * Halts or unhalts the compute MEs.
4556  */
4557 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4558 {
4559         if (enable)
4560                 WREG32(CP_MEC_CNTL, 0);
4561         else {
4562                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4563                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4564                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4565         }
4566         udelay(50);
4567 }
4568
4569 /**
4570  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4571  *
4572  * @rdev: radeon_device pointer
4573  *
4574  * Loads the compute MEC1&2 ucode.
4575  * Returns 0 for success, -EINVAL if the ucode is not available.
4576  */
4577 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4578 {
4579         int i;
4580
4581         if (!rdev->mec_fw)
4582                 return -EINVAL;
4583
4584         cik_cp_compute_enable(rdev, false);
4585
4586         if (rdev->new_fw) {
4587                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4588                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4589                 const __le32 *fw_data;
4590                 u32 fw_size;
4591
4592                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4593
4594                 /* MEC1 */
4595                 fw_data = (const __le32 *)
4596                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4597                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4598                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4599                 for (i = 0; i < fw_size; i++)
4600                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4601                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4602
4603                 /* MEC2 */
4604                 if (rdev->family == CHIP_KAVERI) {
4605                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4606                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4607
4608                         fw_data = (const __le32 *)
4609                                 (rdev->mec2_fw->data +
4610                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4611                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4612                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4613                         for (i = 0; i < fw_size; i++)
4614                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4615                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4616                 }
4617         } else {
4618                 const __be32 *fw_data;
4619
4620                 /* MEC1 */
4621                 fw_data = (const __be32 *)rdev->mec_fw->data;
4622                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4623                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4624                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4625                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4626
4627                 if (rdev->family == CHIP_KAVERI) {
4628                         /* MEC2 */
4629                         fw_data = (const __be32 *)rdev->mec_fw->data;
4630                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4631                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4632                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4633                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4634                 }
4635         }
4636
4637         return 0;
4638 }
4639
4640 /**
4641  * cik_cp_compute_start - start the compute queues
4642  *
4643  * @rdev: radeon_device pointer
4644  *
4645  * Enable the compute queues.
4646  * Returns 0 for success, error for failure.
4647  */
4648 static int cik_cp_compute_start(struct radeon_device *rdev)
4649 {
4650         cik_cp_compute_enable(rdev, true);
4651
4652         return 0;
4653 }
4654
4655 /**
4656  * cik_cp_compute_fini - stop the compute queues
4657  *
4658  * @rdev: radeon_device pointer
4659  *
4660  * Stop the compute queues and tear down the driver queue
4661  * info.
4662  */
4663 static void cik_cp_compute_fini(struct radeon_device *rdev)
4664 {
4665         int i, idx, r;
4666
4667         cik_cp_compute_enable(rdev, false);
4668
4669         for (i = 0; i < 2; i++) {
4670                 if (i == 0)
4671                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4672                 else
4673                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4674
4675                 if (rdev->ring[idx].mqd_obj) {
4676                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4677                         if (unlikely(r != 0))
4678                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4679
4680                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4681                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4682
4683                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4684                         rdev->ring[idx].mqd_obj = NULL;
4685                 }
4686         }
4687 }
4688
4689 static void cik_mec_fini(struct radeon_device *rdev)
4690 {
4691         int r;
4692
4693         if (rdev->mec.hpd_eop_obj) {
4694                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4695                 if (unlikely(r != 0))
4696                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4697                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4698                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4699
4700                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4701                 rdev->mec.hpd_eop_obj = NULL;
4702         }
4703 }
4704
4705 #define MEC_HPD_SIZE 2048
4706
4707 static int cik_mec_init(struct radeon_device *rdev)
4708 {
4709         int r;
4710         u32 *hpd;
4711
4712         /*
4713          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4714          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4715          * Nonetheless, we assign only 1 pipe because all other pipes will
4716          * be handled by KFD
4717          */
4718         rdev->mec.num_mec = 1;
4719         rdev->mec.num_pipe = 1;
4720         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4721
4722         if (rdev->mec.hpd_eop_obj == NULL) {
4723                 r = radeon_bo_create(rdev,
4724                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4725                                      PAGE_SIZE, true,
4726                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4727                                      &rdev->mec.hpd_eop_obj);
4728                 if (r) {
4729                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4730                         return r;
4731                 }
4732         }
4733
4734         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4735         if (unlikely(r != 0)) {
4736                 cik_mec_fini(rdev);
4737                 return r;
4738         }
4739         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4740                           &rdev->mec.hpd_eop_gpu_addr);
4741         if (r) {
4742                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4743                 cik_mec_fini(rdev);
4744                 return r;
4745         }
4746         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4747         if (r) {
4748                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4749                 cik_mec_fini(rdev);
4750                 return r;
4751         }
4752
4753         /* clear memory.  Not sure if this is required or not */
4754         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4755
4756         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4757         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4758
4759         return 0;
4760 }
4761
4762 struct hqd_registers
4763 {
4764         u32 cp_mqd_base_addr;
4765         u32 cp_mqd_base_addr_hi;
4766         u32 cp_hqd_active;
4767         u32 cp_hqd_vmid;
4768         u32 cp_hqd_persistent_state;
4769         u32 cp_hqd_pipe_priority;
4770         u32 cp_hqd_queue_priority;
4771         u32 cp_hqd_quantum;
4772         u32 cp_hqd_pq_base;
4773         u32 cp_hqd_pq_base_hi;
4774         u32 cp_hqd_pq_rptr;
4775         u32 cp_hqd_pq_rptr_report_addr;
4776         u32 cp_hqd_pq_rptr_report_addr_hi;
4777         u32 cp_hqd_pq_wptr_poll_addr;
4778         u32 cp_hqd_pq_wptr_poll_addr_hi;
4779         u32 cp_hqd_pq_doorbell_control;
4780         u32 cp_hqd_pq_wptr;
4781         u32 cp_hqd_pq_control;
4782         u32 cp_hqd_ib_base_addr;
4783         u32 cp_hqd_ib_base_addr_hi;
4784         u32 cp_hqd_ib_rptr;
4785         u32 cp_hqd_ib_control;
4786         u32 cp_hqd_iq_timer;
4787         u32 cp_hqd_iq_rptr;
4788         u32 cp_hqd_dequeue_request;
4789         u32 cp_hqd_dma_offload;
4790         u32 cp_hqd_sema_cmd;
4791         u32 cp_hqd_msg_type;
4792         u32 cp_hqd_atomic0_preop_lo;
4793         u32 cp_hqd_atomic0_preop_hi;
4794         u32 cp_hqd_atomic1_preop_lo;
4795         u32 cp_hqd_atomic1_preop_hi;
4796         u32 cp_hqd_hq_scheduler0;
4797         u32 cp_hqd_hq_scheduler1;
4798         u32 cp_mqd_control;
4799 };
4800
4801 struct bonaire_mqd
4802 {
4803         u32 header;
4804         u32 dispatch_initiator;
4805         u32 dimensions[3];
4806         u32 start_idx[3];
4807         u32 num_threads[3];
4808         u32 pipeline_stat_enable;
4809         u32 perf_counter_enable;
4810         u32 pgm[2];
4811         u32 tba[2];
4812         u32 tma[2];
4813         u32 pgm_rsrc[2];
4814         u32 vmid;
4815         u32 resource_limits;
4816         u32 static_thread_mgmt01[2];
4817         u32 tmp_ring_size;
4818         u32 static_thread_mgmt23[2];
4819         u32 restart[3];
4820         u32 thread_trace_enable;
4821         u32 reserved1;
4822         u32 user_data[16];
4823         u32 vgtcs_invoke_count[2];
4824         struct hqd_registers queue_state;
4825         u32 dequeue_cntr;
4826         u32 interrupt_queue[64];
4827 };
4828
4829 /**
4830  * cik_cp_compute_resume - setup the compute queue registers
4831  *
4832  * @rdev: radeon_device pointer
4833  *
4834  * Program the compute queues and test them to make sure they
4835  * are working.
4836  * Returns 0 for success, error for failure.
4837  */
4838 static int cik_cp_compute_resume(struct radeon_device *rdev)
4839 {
4840         int r, i, j, idx;
4841         u32 tmp;
4842         bool use_doorbell = true;
4843         u64 hqd_gpu_addr;
4844         u64 mqd_gpu_addr;
4845         u64 eop_gpu_addr;
4846         u64 wb_gpu_addr;
4847         u32 *buf;
4848         struct bonaire_mqd *mqd;
4849
4850         r = cik_cp_compute_start(rdev);
4851         if (r)
4852                 return r;
4853
4854         /* fix up chicken bits */
4855         tmp = RREG32(CP_CPF_DEBUG);
4856         tmp |= (1 << 23);
4857         WREG32(CP_CPF_DEBUG, tmp);
4858
4859         /* init the pipes */
4860         mutex_lock(&rdev->srbm_mutex);
4861
4862         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4863
4864         cik_srbm_select(rdev, 0, 0, 0, 0);
4865
4866         /* write the EOP addr */
4867         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4868         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4869
4870         /* set the VMID assigned */
4871         WREG32(CP_HPD_EOP_VMID, 0);
4872
4873         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4874         tmp = RREG32(CP_HPD_EOP_CONTROL);
4875         tmp &= ~EOP_SIZE_MASK;
4876         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4877         WREG32(CP_HPD_EOP_CONTROL, tmp);
4878
4879         mutex_unlock(&rdev->srbm_mutex);
4880
4881         /* init the queues.  Just two for now. */
4882         for (i = 0; i < 2; i++) {
4883                 if (i == 0)
4884                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4885                 else
4886                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4887
4888                 if (rdev->ring[idx].mqd_obj == NULL) {
4889                         r = radeon_bo_create(rdev,
4890                                              sizeof(struct bonaire_mqd),
4891                                              PAGE_SIZE, true,
4892                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4893                                              NULL, &rdev->ring[idx].mqd_obj);
4894                         if (r) {
4895                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4896                                 return r;
4897                         }
4898                 }
4899
4900                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4901                 if (unlikely(r != 0)) {
4902                         cik_cp_compute_fini(rdev);
4903                         return r;
4904                 }
4905                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4906                                   &mqd_gpu_addr);
4907                 if (r) {
4908                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4909                         cik_cp_compute_fini(rdev);
4910                         return r;
4911                 }
4912                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4913                 if (r) {
4914                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4915                         cik_cp_compute_fini(rdev);
4916                         return r;
4917                 }
4918
4919                 /* init the mqd struct */
4920                 memset(buf, 0, sizeof(struct bonaire_mqd));
4921
4922                 mqd = (struct bonaire_mqd *)buf;
4923                 mqd->header = 0xC0310800;
4924                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4925                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4926                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4927                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4928
4929                 mutex_lock(&rdev->srbm_mutex);
4930                 cik_srbm_select(rdev, rdev->ring[idx].me,
4931                                 rdev->ring[idx].pipe,
4932                                 rdev->ring[idx].queue, 0);
4933
4934                 /* disable wptr polling */
4935                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4936                 tmp &= ~WPTR_POLL_EN;
4937                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4938
4939                 /* enable doorbell? */
4940                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4941                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4942                 if (use_doorbell)
4943                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4944                 else
4945                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4946                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4947                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4948
4949                 /* disable the queue if it's active */
4950                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4951                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4952                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4953                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4954                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4955                         for (j = 0; j < rdev->usec_timeout; j++) {
4956                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4957                                         break;
4958                                 udelay(1);
4959                         }
4960                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4961                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4962                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4963                 }
4964
4965                 /* set the pointer to the MQD */
4966                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4967                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4968                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4969                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4970                 /* set MQD vmid to 0 */
4971                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4972                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4973                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4974
4975                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4976                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4977                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4978                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4979                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4980                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4981
4982                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4983                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4984                 mqd->queue_state.cp_hqd_pq_control &=
4985                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4986
4987                 mqd->queue_state.cp_hqd_pq_control |=
4988                         order_base_2(rdev->ring[idx].ring_size / 8);
4989                 mqd->queue_state.cp_hqd_pq_control |=
4990                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4991 #ifdef __BIG_ENDIAN
4992                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4993 #endif
4994                 mqd->queue_state.cp_hqd_pq_control &=
4995                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4996                 mqd->queue_state.cp_hqd_pq_control |=
4997                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4998                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4999
5000                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5001                 if (i == 0)
5002                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5003                 else
5004                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5005                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5006                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5007                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5008                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5009                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5010
5011                 /* set the wb address wether it's enabled or not */
5012                 if (i == 0)
5013                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5014                 else
5015                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5016                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5017                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5018                         upper_32_bits(wb_gpu_addr) & 0xffff;
5019                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5020                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5021                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5022                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5023
5024                 /* enable the doorbell if requested */
5025                 if (use_doorbell) {
5026                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5027                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5028                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5029                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5030                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5031                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5032                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5033                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5034
5035                 } else {
5036                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5037                 }
5038                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5039                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5040
5041                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5042                 rdev->ring[idx].wptr = 0;
5043                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5044                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5045                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5046
5047                 /* set the vmid for the queue */
5048                 mqd->queue_state.cp_hqd_vmid = 0;
5049                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5050
5051                 /* activate the queue */
5052                 mqd->queue_state.cp_hqd_active = 1;
5053                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5054
5055                 cik_srbm_select(rdev, 0, 0, 0, 0);
5056                 mutex_unlock(&rdev->srbm_mutex);
5057
5058                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5059                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5060
5061                 rdev->ring[idx].ready = true;
5062                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5063                 if (r)
5064                         rdev->ring[idx].ready = false;
5065         }
5066
5067         return 0;
5068 }
5069
5070 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5071 {
5072         cik_cp_gfx_enable(rdev, enable);
5073         cik_cp_compute_enable(rdev, enable);
5074 }
5075
5076 static int cik_cp_load_microcode(struct radeon_device *rdev)
5077 {
5078         int r;
5079
5080         r = cik_cp_gfx_load_microcode(rdev);
5081         if (r)
5082                 return r;
5083         r = cik_cp_compute_load_microcode(rdev);
5084         if (r)
5085                 return r;
5086
5087         return 0;
5088 }
5089
5090 static void cik_cp_fini(struct radeon_device *rdev)
5091 {
5092         cik_cp_gfx_fini(rdev);
5093         cik_cp_compute_fini(rdev);
5094 }
5095
5096 static int cik_cp_resume(struct radeon_device *rdev)
5097 {
5098         int r;
5099
5100         cik_enable_gui_idle_interrupt(rdev, false);
5101
5102         r = cik_cp_load_microcode(rdev);
5103         if (r)
5104                 return r;
5105
5106         r = cik_cp_gfx_resume(rdev);
5107         if (r)
5108                 return r;
5109         r = cik_cp_compute_resume(rdev);
5110         if (r)
5111                 return r;
5112
5113         cik_enable_gui_idle_interrupt(rdev, true);
5114
5115         return 0;
5116 }
5117
5118 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5119 {
5120         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5121                 RREG32(GRBM_STATUS));
5122         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5123                 RREG32(GRBM_STATUS2));
5124         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5125                 RREG32(GRBM_STATUS_SE0));
5126         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5127                 RREG32(GRBM_STATUS_SE1));
5128         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5129                 RREG32(GRBM_STATUS_SE2));
5130         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5131                 RREG32(GRBM_STATUS_SE3));
5132         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5133                 RREG32(SRBM_STATUS));
5134         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5135                 RREG32(SRBM_STATUS2));
5136         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5137                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5138         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5139                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5140         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5141         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5142                  RREG32(CP_STALLED_STAT1));
5143         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5144                  RREG32(CP_STALLED_STAT2));
5145         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5146                  RREG32(CP_STALLED_STAT3));
5147         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5148                  RREG32(CP_CPF_BUSY_STAT));
5149         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5150                  RREG32(CP_CPF_STALLED_STAT1));
5151         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5152         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5153         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5154                  RREG32(CP_CPC_STALLED_STAT1));
5155         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5156 }
5157
5158 /**
5159  * cik_gpu_check_soft_reset - check which blocks are busy
5160  *
5161  * @rdev: radeon_device pointer
5162  *
5163  * Check which blocks are busy and return the relevant reset
5164  * mask to be used by cik_gpu_soft_reset().
5165  * Returns a mask of the blocks to be reset.
5166  */
5167 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5168 {
5169         u32 reset_mask = 0;
5170         u32 tmp;
5171
5172         /* GRBM_STATUS */
5173         tmp = RREG32(GRBM_STATUS);
5174         if (tmp & (PA_BUSY | SC_BUSY |
5175                    BCI_BUSY | SX_BUSY |
5176                    TA_BUSY | VGT_BUSY |
5177                    DB_BUSY | CB_BUSY |
5178                    GDS_BUSY | SPI_BUSY |
5179                    IA_BUSY | IA_BUSY_NO_DMA))
5180                 reset_mask |= RADEON_RESET_GFX;
5181
5182         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5183                 reset_mask |= RADEON_RESET_CP;
5184
5185         /* GRBM_STATUS2 */
5186         tmp = RREG32(GRBM_STATUS2);
5187         if (tmp & RLC_BUSY)
5188                 reset_mask |= RADEON_RESET_RLC;
5189
5190         /* SDMA0_STATUS_REG */
5191         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5192         if (!(tmp & SDMA_IDLE))
5193                 reset_mask |= RADEON_RESET_DMA;
5194
5195         /* SDMA1_STATUS_REG */
5196         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5197         if (!(tmp & SDMA_IDLE))
5198                 reset_mask |= RADEON_RESET_DMA1;
5199
5200         /* SRBM_STATUS2 */
5201         tmp = RREG32(SRBM_STATUS2);
5202         if (tmp & SDMA_BUSY)
5203                 reset_mask |= RADEON_RESET_DMA;
5204
5205         if (tmp & SDMA1_BUSY)
5206                 reset_mask |= RADEON_RESET_DMA1;
5207
5208         /* SRBM_STATUS */
5209         tmp = RREG32(SRBM_STATUS);
5210
5211         if (tmp & IH_BUSY)
5212                 reset_mask |= RADEON_RESET_IH;
5213
5214         if (tmp & SEM_BUSY)
5215                 reset_mask |= RADEON_RESET_SEM;
5216
5217         if (tmp & GRBM_RQ_PENDING)
5218                 reset_mask |= RADEON_RESET_GRBM;
5219
5220         if (tmp & VMC_BUSY)
5221                 reset_mask |= RADEON_RESET_VMC;
5222
5223         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5224                    MCC_BUSY | MCD_BUSY))
5225                 reset_mask |= RADEON_RESET_MC;
5226
5227         if (evergreen_is_display_hung(rdev))
5228                 reset_mask |= RADEON_RESET_DISPLAY;
5229
5230         /* Skip MC reset as it's mostly likely not hung, just busy */
5231         if (reset_mask & RADEON_RESET_MC) {
5232                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5233                 reset_mask &= ~RADEON_RESET_MC;
5234         }
5235
5236         return reset_mask;
5237 }
5238
5239 /**
5240  * cik_gpu_soft_reset - soft reset GPU
5241  *
5242  * @rdev: radeon_device pointer
5243  * @reset_mask: mask of which blocks to reset
5244  *
5245  * Soft reset the blocks specified in @reset_mask.
5246  */
5247 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5248 {
5249         struct evergreen_mc_save save;
5250         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5251         u32 tmp;
5252
5253         if (reset_mask == 0)
5254                 return;
5255
5256         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5257
5258         cik_print_gpu_status_regs(rdev);
5259         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5260                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5261         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5262                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5263
5264         /* disable CG/PG */
5265         cik_fini_pg(rdev);
5266         cik_fini_cg(rdev);
5267
5268         /* stop the rlc */
5269         cik_rlc_stop(rdev);
5270
5271         /* Disable GFX parsing/prefetching */
5272         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5273
5274         /* Disable MEC parsing/prefetching */
5275         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5276
5277         if (reset_mask & RADEON_RESET_DMA) {
5278                 /* sdma0 */
5279                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5280                 tmp |= SDMA_HALT;
5281                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5282         }
5283         if (reset_mask & RADEON_RESET_DMA1) {
5284                 /* sdma1 */
5285                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5286                 tmp |= SDMA_HALT;
5287                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5288         }
5289
5290         evergreen_mc_stop(rdev, &save);
5291         if (evergreen_mc_wait_for_idle(rdev)) {
5292                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5293         }
5294
5295         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5296                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5297
5298         if (reset_mask & RADEON_RESET_CP) {
5299                 grbm_soft_reset |= SOFT_RESET_CP;
5300
5301                 srbm_soft_reset |= SOFT_RESET_GRBM;
5302         }
5303
5304         if (reset_mask & RADEON_RESET_DMA)
5305                 srbm_soft_reset |= SOFT_RESET_SDMA;
5306
5307         if (reset_mask & RADEON_RESET_DMA1)
5308                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5309
5310         if (reset_mask & RADEON_RESET_DISPLAY)
5311                 srbm_soft_reset |= SOFT_RESET_DC;
5312
5313         if (reset_mask & RADEON_RESET_RLC)
5314                 grbm_soft_reset |= SOFT_RESET_RLC;
5315
5316         if (reset_mask & RADEON_RESET_SEM)
5317                 srbm_soft_reset |= SOFT_RESET_SEM;
5318
5319         if (reset_mask & RADEON_RESET_IH)
5320                 srbm_soft_reset |= SOFT_RESET_IH;
5321
5322         if (reset_mask & RADEON_RESET_GRBM)
5323                 srbm_soft_reset |= SOFT_RESET_GRBM;
5324
5325         if (reset_mask & RADEON_RESET_VMC)
5326                 srbm_soft_reset |= SOFT_RESET_VMC;
5327
5328         if (!(rdev->flags & RADEON_IS_IGP)) {
5329                 if (reset_mask & RADEON_RESET_MC)
5330                         srbm_soft_reset |= SOFT_RESET_MC;
5331         }
5332
5333         if (grbm_soft_reset) {
5334                 tmp = RREG32(GRBM_SOFT_RESET);
5335                 tmp |= grbm_soft_reset;
5336                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5337                 WREG32(GRBM_SOFT_RESET, tmp);
5338                 tmp = RREG32(GRBM_SOFT_RESET);
5339
5340                 udelay(50);
5341
5342                 tmp &= ~grbm_soft_reset;
5343                 WREG32(GRBM_SOFT_RESET, tmp);
5344                 tmp = RREG32(GRBM_SOFT_RESET);
5345         }
5346
5347         if (srbm_soft_reset) {
5348                 tmp = RREG32(SRBM_SOFT_RESET);
5349                 tmp |= srbm_soft_reset;
5350                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5351                 WREG32(SRBM_SOFT_RESET, tmp);
5352                 tmp = RREG32(SRBM_SOFT_RESET);
5353
5354                 udelay(50);
5355
5356                 tmp &= ~srbm_soft_reset;
5357                 WREG32(SRBM_SOFT_RESET, tmp);
5358                 tmp = RREG32(SRBM_SOFT_RESET);
5359         }
5360
5361         /* Wait a little for things to settle down */
5362         udelay(50);
5363
5364         evergreen_mc_resume(rdev, &save);
5365         udelay(50);
5366
5367         cik_print_gpu_status_regs(rdev);
5368 }
5369
5370 struct kv_reset_save_regs {
5371         u32 gmcon_reng_execute;
5372         u32 gmcon_misc;
5373         u32 gmcon_misc3;
5374 };
5375
5376 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5377                                    struct kv_reset_save_regs *save)
5378 {
5379         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5380         save->gmcon_misc = RREG32(GMCON_MISC);
5381         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5382
5383         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5384         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5385                                                 STCTRL_STUTTER_EN));
5386 }
5387
5388 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5389                                       struct kv_reset_save_regs *save)
5390 {
5391         int i;
5392
5393         WREG32(GMCON_PGFSM_WRITE, 0);
5394         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5395
5396         for (i = 0; i < 5; i++)
5397                 WREG32(GMCON_PGFSM_WRITE, 0);
5398
5399         WREG32(GMCON_PGFSM_WRITE, 0);
5400         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5401
5402         for (i = 0; i < 5; i++)
5403                 WREG32(GMCON_PGFSM_WRITE, 0);
5404
5405         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5406         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5407
5408         for (i = 0; i < 5; i++)
5409                 WREG32(GMCON_PGFSM_WRITE, 0);
5410
5411         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5412         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5413
5414         for (i = 0; i < 5; i++)
5415                 WREG32(GMCON_PGFSM_WRITE, 0);
5416
5417         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5418         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5419
5420         for (i = 0; i < 5; i++)
5421                 WREG32(GMCON_PGFSM_WRITE, 0);
5422
5423         WREG32(GMCON_PGFSM_WRITE, 0);
5424         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5425
5426         for (i = 0; i < 5; i++)
5427                 WREG32(GMCON_PGFSM_WRITE, 0);
5428
5429         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5430         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5431
5432         for (i = 0; i < 5; i++)
5433                 WREG32(GMCON_PGFSM_WRITE, 0);
5434
5435         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5436         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5437
5438         for (i = 0; i < 5; i++)
5439                 WREG32(GMCON_PGFSM_WRITE, 0);
5440
5441         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5442         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5443
5444         for (i = 0; i < 5; i++)
5445                 WREG32(GMCON_PGFSM_WRITE, 0);
5446
5447         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5448         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5449
5450         for (i = 0; i < 5; i++)
5451                 WREG32(GMCON_PGFSM_WRITE, 0);
5452
5453         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5454         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5455
5456         WREG32(GMCON_MISC3, save->gmcon_misc3);
5457         WREG32(GMCON_MISC, save->gmcon_misc);
5458         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5459 }
5460
5461 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5462 {
5463         struct evergreen_mc_save save;
5464         struct kv_reset_save_regs kv_save = { 0 };
5465         u32 tmp, i;
5466
5467         dev_info(rdev->dev, "GPU pci config reset\n");
5468
5469         /* disable dpm? */
5470
5471         /* disable cg/pg */
5472         cik_fini_pg(rdev);
5473         cik_fini_cg(rdev);
5474
5475         /* Disable GFX parsing/prefetching */
5476         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5477
5478         /* Disable MEC parsing/prefetching */
5479         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5480
5481         /* sdma0 */
5482         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5483         tmp |= SDMA_HALT;
5484         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5485         /* sdma1 */
5486         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5487         tmp |= SDMA_HALT;
5488         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5489         /* XXX other engines? */
5490
5491         /* halt the rlc, disable cp internal ints */
5492         cik_rlc_stop(rdev);
5493
5494         udelay(50);
5495
5496         /* disable mem access */
5497         evergreen_mc_stop(rdev, &save);
5498         if (evergreen_mc_wait_for_idle(rdev)) {
5499                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5500         }
5501
5502         if (rdev->flags & RADEON_IS_IGP)
5503                 kv_save_regs_for_reset(rdev, &kv_save);
5504
5505         /* disable BM */
5506         pci_clear_master(rdev->pdev);
5507         /* reset */
5508         radeon_pci_config_reset(rdev);
5509
5510         udelay(100);
5511
5512         /* wait for asic to come out of reset */
5513         for (i = 0; i < rdev->usec_timeout; i++) {
5514                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5515                         break;
5516                 udelay(1);
5517         }
5518
5519         /* does asic init need to be run first??? */
5520         if (rdev->flags & RADEON_IS_IGP)
5521                 kv_restore_regs_for_reset(rdev, &kv_save);
5522 }
5523
5524 /**
5525  * cik_asic_reset - soft reset GPU
5526  *
5527  * @rdev: radeon_device pointer
5528  *
5529  * Look up which blocks are hung and attempt
5530  * to reset them.
5531  * Returns 0 for success.
5532  */
5533 int cik_asic_reset(struct radeon_device *rdev)
5534 {
5535         u32 reset_mask;
5536
5537         reset_mask = cik_gpu_check_soft_reset(rdev);
5538
5539         if (reset_mask)
5540                 r600_set_bios_scratch_engine_hung(rdev, true);
5541
5542         /* try soft reset */
5543         cik_gpu_soft_reset(rdev, reset_mask);
5544
5545         reset_mask = cik_gpu_check_soft_reset(rdev);
5546
5547         /* try pci config reset */
5548         if (reset_mask && radeon_hard_reset)
5549                 cik_gpu_pci_config_reset(rdev);
5550
5551         reset_mask = cik_gpu_check_soft_reset(rdev);
5552
5553         if (!reset_mask)
5554                 r600_set_bios_scratch_engine_hung(rdev, false);
5555
5556         return 0;
5557 }
5558
5559 /**
5560  * cik_gfx_is_lockup - check if the 3D engine is locked up
5561  *
5562  * @rdev: radeon_device pointer
5563  * @ring: radeon_ring structure holding ring information
5564  *
5565  * Check if the 3D engine is locked up (CIK).
5566  * Returns true if the engine is locked, false if not.
5567  */
5568 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5569 {
5570         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5571
5572         if (!(reset_mask & (RADEON_RESET_GFX |
5573                             RADEON_RESET_COMPUTE |
5574                             RADEON_RESET_CP))) {
5575                 radeon_ring_lockup_update(rdev, ring);
5576                 return false;
5577         }
5578         return radeon_ring_test_lockup(rdev, ring);
5579 }
5580
5581 /* MC */
5582 /**
5583  * cik_mc_program - program the GPU memory controller
5584  *
5585  * @rdev: radeon_device pointer
5586  *
5587  * Set the location of vram, gart, and AGP in the GPU's
5588  * physical address space (CIK).
5589  */
5590 static void cik_mc_program(struct radeon_device *rdev)
5591 {
5592         struct evergreen_mc_save save;
5593         u32 tmp;
5594         int i, j;
5595
5596         /* Initialize HDP */
5597         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5598                 WREG32((0x2c14 + j), 0x00000000);
5599                 WREG32((0x2c18 + j), 0x00000000);
5600                 WREG32((0x2c1c + j), 0x00000000);
5601                 WREG32((0x2c20 + j), 0x00000000);
5602                 WREG32((0x2c24 + j), 0x00000000);
5603         }
5604         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5605
5606         evergreen_mc_stop(rdev, &save);
5607         if (radeon_mc_wait_for_idle(rdev)) {
5608                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5609         }
5610         /* Lockout access through VGA aperture*/
5611         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5612         /* Update configuration */
5613         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5614                rdev->mc.vram_start >> 12);
5615         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5616                rdev->mc.vram_end >> 12);
5617         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5618                rdev->vram_scratch.gpu_addr >> 12);
5619         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5620         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5621         WREG32(MC_VM_FB_LOCATION, tmp);
5622         /* XXX double check these! */
5623         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5624         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5625         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5626         WREG32(MC_VM_AGP_BASE, 0);
5627         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5628         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5629         if (radeon_mc_wait_for_idle(rdev)) {
5630                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5631         }
5632         evergreen_mc_resume(rdev, &save);
5633         /* we need to own VRAM, so turn off the VGA renderer here
5634          * to stop it overwriting our objects */
5635         rv515_vga_render_disable(rdev);
5636 }
5637
5638 /**
5639  * cik_mc_init - initialize the memory controller driver params
5640  *
5641  * @rdev: radeon_device pointer
5642  *
5643  * Look up the amount of vram, vram width, and decide how to place
5644  * vram and gart within the GPU's physical address space (CIK).
5645  * Returns 0 for success.
5646  */
5647 static int cik_mc_init(struct radeon_device *rdev)
5648 {
5649         u32 tmp;
5650         int chansize, numchan;
5651
5652         /* Get VRAM informations */
5653         rdev->mc.vram_is_ddr = true;
5654         tmp = RREG32(MC_ARB_RAMCFG);
5655         if (tmp & CHANSIZE_MASK) {
5656                 chansize = 64;
5657         } else {
5658                 chansize = 32;
5659         }
5660         tmp = RREG32(MC_SHARED_CHMAP);
5661         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5662         case 0:
5663         default:
5664                 numchan = 1;
5665                 break;
5666         case 1:
5667                 numchan = 2;
5668                 break;
5669         case 2:
5670                 numchan = 4;
5671                 break;
5672         case 3:
5673                 numchan = 8;
5674                 break;
5675         case 4:
5676                 numchan = 3;
5677                 break;
5678         case 5:
5679                 numchan = 6;
5680                 break;
5681         case 6:
5682                 numchan = 10;
5683                 break;
5684         case 7:
5685                 numchan = 12;
5686                 break;
5687         case 8:
5688                 numchan = 16;
5689                 break;
5690         }
5691         rdev->mc.vram_width = numchan * chansize;
5692         /* Could aper size report 0 ? */
5693         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5694         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5695         /* size in MB on si */
5696         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5697         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5698         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5699         si_vram_gtt_location(rdev, &rdev->mc);
5700         radeon_update_bandwidth_info(rdev);
5701
5702         return 0;
5703 }
5704
5705 /*
5706  * GART
5707  * VMID 0 is the physical GPU addresses as used by the kernel.
5708  * VMIDs 1-15 are used for userspace clients and are handled
5709  * by the radeon vm/hsa code.
5710  */
5711 /**
5712  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5713  *
5714  * @rdev: radeon_device pointer
5715  *
5716  * Flush the TLB for the VMID 0 page table (CIK).
5717  */
5718 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5719 {
5720         /* flush hdp cache */
5721         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5722
5723         /* bits 0-15 are the VM contexts0-15 */
5724         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5725 }
5726
5727 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5728 {
5729         int i;
5730         uint32_t sh_mem_bases, sh_mem_config;
5731
5732         sh_mem_bases = 0x6000 | 0x6000 << 16;
5733         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5734         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5735
5736         mutex_lock(&rdev->srbm_mutex);
5737         for (i = 8; i < 16; i++) {
5738                 cik_srbm_select(rdev, 0, 0, 0, i);
5739                 /* CP and shaders */
5740                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5741                 WREG32(SH_MEM_APE1_BASE, 1);
5742                 WREG32(SH_MEM_APE1_LIMIT, 0);
5743                 WREG32(SH_MEM_BASES, sh_mem_bases);
5744         }
5745         cik_srbm_select(rdev, 0, 0, 0, 0);
5746         mutex_unlock(&rdev->srbm_mutex);
5747 }
5748
5749 /**
5750  * cik_pcie_gart_enable - gart enable
5751  *
5752  * @rdev: radeon_device pointer
5753  *
5754  * This sets up the TLBs, programs the page tables for VMID0,
5755  * sets up the hw for VMIDs 1-15 which are allocated on
5756  * demand, and sets up the global locations for the LDS, GDS,
5757  * and GPUVM for FSA64 clients (CIK).
5758  * Returns 0 for success, errors for failure.
5759  */
5760 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5761 {
5762         int r, i;
5763
5764         if (rdev->gart.robj == NULL) {
5765                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5766                 return -EINVAL;
5767         }
5768         r = radeon_gart_table_vram_pin(rdev);
5769         if (r)
5770                 return r;
5771         /* Setup TLB control */
5772         WREG32(MC_VM_MX_L1_TLB_CNTL,
5773                (0xA << 7) |
5774                ENABLE_L1_TLB |
5775                ENABLE_L1_FRAGMENT_PROCESSING |
5776                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5777                ENABLE_ADVANCED_DRIVER_MODEL |
5778                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5779         /* Setup L2 cache */
5780         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5781                ENABLE_L2_FRAGMENT_PROCESSING |
5782                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5783                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5784                EFFECTIVE_L2_QUEUE_SIZE(7) |
5785                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5786         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5787         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5788                BANK_SELECT(4) |
5789                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5790         /* setup context0 */
5791         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5792         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5793         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5794         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5795                         (u32)(rdev->dummy_page.addr >> 12));
5796         WREG32(VM_CONTEXT0_CNTL2, 0);
5797         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5798                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5799
5800         WREG32(0x15D4, 0);
5801         WREG32(0x15D8, 0);
5802         WREG32(0x15DC, 0);
5803
5804         /* restore context1-15 */
5805         /* set vm size, must be a multiple of 4 */
5806         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5807         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5808         for (i = 1; i < 16; i++) {
5809                 if (i < 8)
5810                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5811                                rdev->vm_manager.saved_table_addr[i]);
5812                 else
5813                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5814                                rdev->vm_manager.saved_table_addr[i]);
5815         }
5816
5817         /* enable context1-15 */
5818         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5819                (u32)(rdev->dummy_page.addr >> 12));
5820         WREG32(VM_CONTEXT1_CNTL2, 4);
5821         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5822                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5823                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5824                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5825                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5826                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5827                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5828                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5829                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5830                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5831                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5832                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5833                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5834                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5835
5836         if (rdev->family == CHIP_KAVERI) {
5837                 u32 tmp = RREG32(CHUB_CONTROL);
5838                 tmp &= ~BYPASS_VM;
5839                 WREG32(CHUB_CONTROL, tmp);
5840         }
5841
5842         /* XXX SH_MEM regs */
5843         /* where to put LDS, scratch, GPUVM in FSA64 space */
5844         mutex_lock(&rdev->srbm_mutex);
5845         for (i = 0; i < 16; i++) {
5846                 cik_srbm_select(rdev, 0, 0, 0, i);
5847                 /* CP and shaders */
5848                 WREG32(SH_MEM_CONFIG, 0);
5849                 WREG32(SH_MEM_APE1_BASE, 1);
5850                 WREG32(SH_MEM_APE1_LIMIT, 0);
5851                 WREG32(SH_MEM_BASES, 0);
5852                 /* SDMA GFX */
5853                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5854                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5855                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5856                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5857                 /* XXX SDMA RLC - todo */
5858         }
5859         cik_srbm_select(rdev, 0, 0, 0, 0);
5860         mutex_unlock(&rdev->srbm_mutex);
5861
5862         cik_pcie_init_compute_vmid(rdev);
5863
5864         cik_pcie_gart_tlb_flush(rdev);
5865         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5866                  (unsigned)(rdev->mc.gtt_size >> 20),
5867                  (unsigned long long)rdev->gart.table_addr);
5868         rdev->gart.ready = true;
5869         return 0;
5870 }
5871
5872 /**
5873  * cik_pcie_gart_disable - gart disable
5874  *
5875  * @rdev: radeon_device pointer
5876  *
5877  * This disables all VM page table (CIK).
5878  */
5879 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5880 {
5881         unsigned i;
5882
5883         for (i = 1; i < 16; ++i) {
5884                 uint32_t reg;
5885                 if (i < 8)
5886                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5887                 else
5888                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5889                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5890         }
5891
5892         /* Disable all tables */
5893         WREG32(VM_CONTEXT0_CNTL, 0);
5894         WREG32(VM_CONTEXT1_CNTL, 0);
5895         /* Setup TLB control */
5896         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5897                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5898         /* Setup L2 cache */
5899         WREG32(VM_L2_CNTL,
5900                ENABLE_L2_FRAGMENT_PROCESSING |
5901                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5902                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5903                EFFECTIVE_L2_QUEUE_SIZE(7) |
5904                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5905         WREG32(VM_L2_CNTL2, 0);
5906         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5907                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5908         radeon_gart_table_vram_unpin(rdev);
5909 }
5910
5911 /**
5912  * cik_pcie_gart_fini - vm fini callback
5913  *
5914  * @rdev: radeon_device pointer
5915  *
5916  * Tears down the driver GART/VM setup (CIK).
5917  */
5918 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5919 {
5920         cik_pcie_gart_disable(rdev);
5921         radeon_gart_table_vram_free(rdev);
5922         radeon_gart_fini(rdev);
5923 }
5924
5925 /* vm parser */
5926 /**
5927  * cik_ib_parse - vm ib_parse callback
5928  *
5929  * @rdev: radeon_device pointer
5930  * @ib: indirect buffer pointer
5931  *
5932  * CIK uses hw IB checking so this is a nop (CIK).
5933  */
5934 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5935 {
5936         return 0;
5937 }
5938
5939 /*
5940  * vm
5941  * VMID 0 is the physical GPU addresses as used by the kernel.
5942  * VMIDs 1-15 are used for userspace clients and are handled
5943  * by the radeon vm/hsa code.
5944  */
5945 /**
5946  * cik_vm_init - cik vm init callback
5947  *
5948  * @rdev: radeon_device pointer
5949  *
5950  * Inits cik specific vm parameters (number of VMs, base of vram for
5951  * VMIDs 1-15) (CIK).
5952  * Returns 0 for success.
5953  */
5954 int cik_vm_init(struct radeon_device *rdev)
5955 {
5956         /*
5957          * number of VMs
5958          * VMID 0 is reserved for System
5959          * radeon graphics/compute will use VMIDs 1-7
5960          * amdkfd will use VMIDs 8-15
5961          */
5962         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5963         /* base offset of vram pages */
5964         if (rdev->flags & RADEON_IS_IGP) {
5965                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5966                 tmp <<= 22;
5967                 rdev->vm_manager.vram_base_offset = tmp;
5968         } else
5969                 rdev->vm_manager.vram_base_offset = 0;
5970
5971         return 0;
5972 }
5973
5974 /**
5975  * cik_vm_fini - cik vm fini callback
5976  *
5977  * @rdev: radeon_device pointer
5978  *
5979  * Tear down any asic specific VM setup (CIK).
5980  */
5981 void cik_vm_fini(struct radeon_device *rdev)
5982 {
5983 }
5984
5985 /**
5986  * cik_vm_decode_fault - print human readable fault info
5987  *
5988  * @rdev: radeon_device pointer
5989  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5990  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5991  *
5992  * Print human readable fault information (CIK).
5993  */
5994 static void cik_vm_decode_fault(struct radeon_device *rdev,
5995                                 u32 status, u32 addr, u32 mc_client)
5996 {
5997         u32 mc_id;
5998         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5999         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6000         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6001                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6002
6003         if (rdev->family == CHIP_HAWAII)
6004                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6005         else
6006                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6007
6008         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6009                protections, vmid, addr,
6010                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6011                block, mc_client, mc_id);
6012 }
6013
6014 /**
6015  * cik_vm_flush - cik vm flush using the CP
6016  *
6017  * @rdev: radeon_device pointer
6018  *
6019  * Update the page table base and flush the VM TLB
6020  * using the CP (CIK).
6021  */
6022 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6023                   unsigned vm_id, uint64_t pd_addr)
6024 {
6025         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6026
6027         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6028         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6029                                  WRITE_DATA_DST_SEL(0)));
6030         if (vm_id < 8) {
6031                 radeon_ring_write(ring,
6032                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6033         } else {
6034                 radeon_ring_write(ring,
6035                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6036         }
6037         radeon_ring_write(ring, 0);
6038         radeon_ring_write(ring, pd_addr >> 12);
6039
6040         /* update SH_MEM_* regs */
6041         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6042         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6043                                  WRITE_DATA_DST_SEL(0)));
6044         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6045         radeon_ring_write(ring, 0);
6046         radeon_ring_write(ring, VMID(vm_id));
6047
6048         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6049         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6050                                  WRITE_DATA_DST_SEL(0)));
6051         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6052         radeon_ring_write(ring, 0);
6053
6054         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6055         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6056         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6057         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6058
6059         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6060         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6061                                  WRITE_DATA_DST_SEL(0)));
6062         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6063         radeon_ring_write(ring, 0);
6064         radeon_ring_write(ring, VMID(0));
6065
6066         /* HDP flush */
6067         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6068
6069         /* bits 0-15 are the VM contexts0-15 */
6070         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6071         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6072                                  WRITE_DATA_DST_SEL(0)));
6073         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6074         radeon_ring_write(ring, 0);
6075         radeon_ring_write(ring, 1 << vm_id);
6076
6077         /* wait for the invalidate to complete */
6078         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6079         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6080                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6081                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6082         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6083         radeon_ring_write(ring, 0);
6084         radeon_ring_write(ring, 0); /* ref */
6085         radeon_ring_write(ring, 0); /* mask */
6086         radeon_ring_write(ring, 0x20); /* poll interval */
6087
6088         /* compute doesn't have PFP */
6089         if (usepfp) {
6090                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6091                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6092                 radeon_ring_write(ring, 0x0);
6093         }
6094 }
6095
6096 /*
6097  * RLC
6098  * The RLC is a multi-purpose microengine that handles a
6099  * variety of functions, the most important of which is
6100  * the interrupt controller.
6101  */
6102 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6103                                           bool enable)
6104 {
6105         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6106
6107         if (enable)
6108                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6109         else
6110                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6111         WREG32(CP_INT_CNTL_RING0, tmp);
6112 }
6113
6114 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6115 {
6116         u32 tmp;
6117
6118         tmp = RREG32(RLC_LB_CNTL);
6119         if (enable)
6120                 tmp |= LOAD_BALANCE_ENABLE;
6121         else
6122                 tmp &= ~LOAD_BALANCE_ENABLE;
6123         WREG32(RLC_LB_CNTL, tmp);
6124 }
6125
6126 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6127 {
6128         u32 i, j, k;
6129         u32 mask;
6130
6131         mutex_lock(&rdev->grbm_idx_mutex);
6132         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6133                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6134                         cik_select_se_sh(rdev, i, j);
6135                         for (k = 0; k < rdev->usec_timeout; k++) {
6136                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6137                                         break;
6138                                 udelay(1);
6139                         }
6140                 }
6141         }
6142         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6143         mutex_unlock(&rdev->grbm_idx_mutex);
6144
6145         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6146         for (k = 0; k < rdev->usec_timeout; k++) {
6147                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6148                         break;
6149                 udelay(1);
6150         }
6151 }
6152
6153 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6154 {
6155         u32 tmp;
6156
6157         tmp = RREG32(RLC_CNTL);
6158         if (tmp != rlc)
6159                 WREG32(RLC_CNTL, rlc);
6160 }
6161
6162 static u32 cik_halt_rlc(struct radeon_device *rdev)
6163 {
6164         u32 data, orig;
6165
6166         orig = data = RREG32(RLC_CNTL);
6167
6168         if (data & RLC_ENABLE) {
6169                 u32 i;
6170
6171                 data &= ~RLC_ENABLE;
6172                 WREG32(RLC_CNTL, data);
6173
6174                 for (i = 0; i < rdev->usec_timeout; i++) {
6175                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6176                                 break;
6177                         udelay(1);
6178                 }
6179
6180                 cik_wait_for_rlc_serdes(rdev);
6181         }
6182
6183         return orig;
6184 }
6185
6186 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6187 {
6188         u32 tmp, i, mask;
6189
6190         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6191         WREG32(RLC_GPR_REG2, tmp);
6192
6193         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6194         for (i = 0; i < rdev->usec_timeout; i++) {
6195                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6196                         break;
6197                 udelay(1);
6198         }
6199
6200         for (i = 0; i < rdev->usec_timeout; i++) {
6201                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6202                         break;
6203                 udelay(1);
6204         }
6205 }
6206
6207 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6208 {
6209         u32 tmp;
6210
6211         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6212         WREG32(RLC_GPR_REG2, tmp);
6213 }
6214
6215 /**
6216  * cik_rlc_stop - stop the RLC ME
6217  *
6218  * @rdev: radeon_device pointer
6219  *
6220  * Halt the RLC ME (MicroEngine) (CIK).
6221  */
6222 static void cik_rlc_stop(struct radeon_device *rdev)
6223 {
6224         WREG32(RLC_CNTL, 0);
6225
6226         cik_enable_gui_idle_interrupt(rdev, false);
6227
6228         cik_wait_for_rlc_serdes(rdev);
6229 }
6230
6231 /**
6232  * cik_rlc_start - start the RLC ME
6233  *
6234  * @rdev: radeon_device pointer
6235  *
6236  * Unhalt the RLC ME (MicroEngine) (CIK).
6237  */
6238 static void cik_rlc_start(struct radeon_device *rdev)
6239 {
6240         WREG32(RLC_CNTL, RLC_ENABLE);
6241
6242         cik_enable_gui_idle_interrupt(rdev, true);
6243
6244         udelay(50);
6245 }
6246
6247 /**
6248  * cik_rlc_resume - setup the RLC hw
6249  *
6250  * @rdev: radeon_device pointer
6251  *
6252  * Initialize the RLC registers, load the ucode,
6253  * and start the RLC (CIK).
6254  * Returns 0 for success, -EINVAL if the ucode is not available.
6255  */
6256 static int cik_rlc_resume(struct radeon_device *rdev)
6257 {
6258         u32 i, size, tmp;
6259
6260         if (!rdev->rlc_fw)
6261                 return -EINVAL;
6262
6263         cik_rlc_stop(rdev);
6264
6265         /* disable CG */
6266         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6267         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6268
6269         si_rlc_reset(rdev);
6270
6271         cik_init_pg(rdev);
6272
6273         cik_init_cg(rdev);
6274
6275         WREG32(RLC_LB_CNTR_INIT, 0);
6276         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6277
6278         mutex_lock(&rdev->grbm_idx_mutex);
6279         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6280         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6281         WREG32(RLC_LB_PARAMS, 0x00600408);
6282         WREG32(RLC_LB_CNTL, 0x80000004);
6283         mutex_unlock(&rdev->grbm_idx_mutex);
6284
6285         WREG32(RLC_MC_CNTL, 0);
6286         WREG32(RLC_UCODE_CNTL, 0);
6287
6288         if (rdev->new_fw) {
6289                 const struct rlc_firmware_header_v1_0 *hdr =
6290                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6291                 const __le32 *fw_data = (const __le32 *)
6292                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6293
6294                 radeon_ucode_print_rlc_hdr(&hdr->header);
6295
6296                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6297                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6298                 for (i = 0; i < size; i++)
6299                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6300                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6301         } else {
6302                 const __be32 *fw_data;
6303
6304                 switch (rdev->family) {
6305                 case CHIP_BONAIRE:
6306                 case CHIP_HAWAII:
6307                 default:
6308                         size = BONAIRE_RLC_UCODE_SIZE;
6309                         break;
6310                 case CHIP_KAVERI:
6311                         size = KV_RLC_UCODE_SIZE;
6312                         break;
6313                 case CHIP_KABINI:
6314                         size = KB_RLC_UCODE_SIZE;
6315                         break;
6316                 case CHIP_MULLINS:
6317                         size = ML_RLC_UCODE_SIZE;
6318                         break;
6319                 }
6320
6321                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6322                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6323                 for (i = 0; i < size; i++)
6324                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6325                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6326         }
6327
6328         /* XXX - find out what chips support lbpw */
6329         cik_enable_lbpw(rdev, false);
6330
6331         if (rdev->family == CHIP_BONAIRE)
6332                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6333
6334         cik_rlc_start(rdev);
6335
6336         return 0;
6337 }
6338
6339 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6340 {
6341         u32 data, orig, tmp, tmp2;
6342
6343         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6344
6345         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6346                 cik_enable_gui_idle_interrupt(rdev, true);
6347
6348                 tmp = cik_halt_rlc(rdev);
6349
6350                 mutex_lock(&rdev->grbm_idx_mutex);
6351                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6352                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6353                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6354                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6355                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6356                 mutex_unlock(&rdev->grbm_idx_mutex);
6357
6358                 cik_update_rlc(rdev, tmp);
6359
6360                 data |= CGCG_EN | CGLS_EN;
6361         } else {
6362                 cik_enable_gui_idle_interrupt(rdev, false);
6363
6364                 RREG32(CB_CGTT_SCLK_CTRL);
6365                 RREG32(CB_CGTT_SCLK_CTRL);
6366                 RREG32(CB_CGTT_SCLK_CTRL);
6367                 RREG32(CB_CGTT_SCLK_CTRL);
6368
6369                 data &= ~(CGCG_EN | CGLS_EN);
6370         }
6371
6372         if (orig != data)
6373                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6374
6375 }
6376
6377 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6378 {
6379         u32 data, orig, tmp = 0;
6380
6381         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6382                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6383                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6384                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6385                                 data |= CP_MEM_LS_EN;
6386                                 if (orig != data)
6387                                         WREG32(CP_MEM_SLP_CNTL, data);
6388                         }
6389                 }
6390
6391                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6392                 data |= 0x00000001;
6393                 data &= 0xfffffffd;
6394                 if (orig != data)
6395                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6396
6397                 tmp = cik_halt_rlc(rdev);
6398
6399                 mutex_lock(&rdev->grbm_idx_mutex);
6400                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6401                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6402                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6403                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6404                 WREG32(RLC_SERDES_WR_CTRL, data);
6405                 mutex_unlock(&rdev->grbm_idx_mutex);
6406
6407                 cik_update_rlc(rdev, tmp);
6408
6409                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6410                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6411                         data &= ~SM_MODE_MASK;
6412                         data |= SM_MODE(0x2);
6413                         data |= SM_MODE_ENABLE;
6414                         data &= ~CGTS_OVERRIDE;
6415                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6416                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6417                                 data &= ~CGTS_LS_OVERRIDE;
6418                         data &= ~ON_MONITOR_ADD_MASK;
6419                         data |= ON_MONITOR_ADD_EN;
6420                         data |= ON_MONITOR_ADD(0x96);
6421                         if (orig != data)
6422                                 WREG32(CGTS_SM_CTRL_REG, data);
6423                 }
6424         } else {
6425                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6426                 data |= 0x00000003;
6427                 if (orig != data)
6428                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6429
6430                 data = RREG32(RLC_MEM_SLP_CNTL);
6431                 if (data & RLC_MEM_LS_EN) {
6432                         data &= ~RLC_MEM_LS_EN;
6433                         WREG32(RLC_MEM_SLP_CNTL, data);
6434                 }
6435
6436                 data = RREG32(CP_MEM_SLP_CNTL);
6437                 if (data & CP_MEM_LS_EN) {
6438                         data &= ~CP_MEM_LS_EN;
6439                         WREG32(CP_MEM_SLP_CNTL, data);
6440                 }
6441
6442                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6443                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6444                 if (orig != data)
6445                         WREG32(CGTS_SM_CTRL_REG, data);
6446
6447                 tmp = cik_halt_rlc(rdev);
6448
6449                 mutex_lock(&rdev->grbm_idx_mutex);
6450                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6451                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6452                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6453                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6454                 WREG32(RLC_SERDES_WR_CTRL, data);
6455                 mutex_unlock(&rdev->grbm_idx_mutex);
6456
6457                 cik_update_rlc(rdev, tmp);
6458         }
6459 }
6460
6461 static const u32 mc_cg_registers[] =
6462 {
6463         MC_HUB_MISC_HUB_CG,
6464         MC_HUB_MISC_SIP_CG,
6465         MC_HUB_MISC_VM_CG,
6466         MC_XPB_CLK_GAT,
6467         ATC_MISC_CG,
6468         MC_CITF_MISC_WR_CG,
6469         MC_CITF_MISC_RD_CG,
6470         MC_CITF_MISC_VM_CG,
6471         VM_L2_CG,
6472 };
6473
6474 static void cik_enable_mc_ls(struct radeon_device *rdev,
6475                              bool enable)
6476 {
6477         int i;
6478         u32 orig, data;
6479
6480         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6481                 orig = data = RREG32(mc_cg_registers[i]);
6482                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6483                         data |= MC_LS_ENABLE;
6484                 else
6485                         data &= ~MC_LS_ENABLE;
6486                 if (data != orig)
6487                         WREG32(mc_cg_registers[i], data);
6488         }
6489 }
6490
6491 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6492                                bool enable)
6493 {
6494         int i;
6495         u32 orig, data;
6496
6497         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6498                 orig = data = RREG32(mc_cg_registers[i]);
6499                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6500                         data |= MC_CG_ENABLE;
6501                 else
6502                         data &= ~MC_CG_ENABLE;
6503                 if (data != orig)
6504                         WREG32(mc_cg_registers[i], data);
6505         }
6506 }
6507
6508 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6509                                  bool enable)
6510 {
6511         u32 orig, data;
6512
6513         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6514                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6515                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6516         } else {
6517                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6518                 data |= 0xff000000;
6519                 if (data != orig)
6520                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6521
6522                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6523                 data |= 0xff000000;
6524                 if (data != orig)
6525                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6526         }
6527 }
6528
6529 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6530                                  bool enable)
6531 {
6532         u32 orig, data;
6533
6534         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6535                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6536                 data |= 0x100;
6537                 if (orig != data)
6538                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6539
6540                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6541                 data |= 0x100;
6542                 if (orig != data)
6543                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6544         } else {
6545                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6546                 data &= ~0x100;
6547                 if (orig != data)
6548                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6549
6550                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6551                 data &= ~0x100;
6552                 if (orig != data)
6553                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6554         }
6555 }
6556
6557 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6558                                 bool enable)
6559 {
6560         u32 orig, data;
6561
6562         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6563                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6564                 data = 0xfff;
6565                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6566
6567                 orig = data = RREG32(UVD_CGC_CTRL);
6568                 data |= DCM;
6569                 if (orig != data)
6570                         WREG32(UVD_CGC_CTRL, data);
6571         } else {
6572                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6573                 data &= ~0xfff;
6574                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6575
6576                 orig = data = RREG32(UVD_CGC_CTRL);
6577                 data &= ~DCM;
6578                 if (orig != data)
6579                         WREG32(UVD_CGC_CTRL, data);
6580         }
6581 }
6582
6583 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6584                                bool enable)
6585 {
6586         u32 orig, data;
6587
6588         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6589
6590         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6591                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6592                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6593         else
6594                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6595                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6596
6597         if (orig != data)
6598                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6599 }
6600
6601 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6602                                 bool enable)
6603 {
6604         u32 orig, data;
6605
6606         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6607
6608         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6609                 data &= ~CLOCK_GATING_DIS;
6610         else
6611                 data |= CLOCK_GATING_DIS;
6612
6613         if (orig != data)
6614                 WREG32(HDP_HOST_PATH_CNTL, data);
6615 }
6616
6617 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6618                               bool enable)
6619 {
6620         u32 orig, data;
6621
6622         orig = data = RREG32(HDP_MEM_POWER_LS);
6623
6624         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6625                 data |= HDP_LS_ENABLE;
6626         else
6627                 data &= ~HDP_LS_ENABLE;
6628
6629         if (orig != data)
6630                 WREG32(HDP_MEM_POWER_LS, data);
6631 }
6632
6633 void cik_update_cg(struct radeon_device *rdev,
6634                    u32 block, bool enable)
6635 {
6636
6637         if (block & RADEON_CG_BLOCK_GFX) {
6638                 cik_enable_gui_idle_interrupt(rdev, false);
6639                 /* order matters! */
6640                 if (enable) {
6641                         cik_enable_mgcg(rdev, true);
6642                         cik_enable_cgcg(rdev, true);
6643                 } else {
6644                         cik_enable_cgcg(rdev, false);
6645                         cik_enable_mgcg(rdev, false);
6646                 }
6647                 cik_enable_gui_idle_interrupt(rdev, true);
6648         }
6649
6650         if (block & RADEON_CG_BLOCK_MC) {
6651                 if (!(rdev->flags & RADEON_IS_IGP)) {
6652                         cik_enable_mc_mgcg(rdev, enable);
6653                         cik_enable_mc_ls(rdev, enable);
6654                 }
6655         }
6656
6657         if (block & RADEON_CG_BLOCK_SDMA) {
6658                 cik_enable_sdma_mgcg(rdev, enable);
6659                 cik_enable_sdma_mgls(rdev, enable);
6660         }
6661
6662         if (block & RADEON_CG_BLOCK_BIF) {
6663                 cik_enable_bif_mgls(rdev, enable);
6664         }
6665
6666         if (block & RADEON_CG_BLOCK_UVD) {
6667                 if (rdev->has_uvd)
6668                         cik_enable_uvd_mgcg(rdev, enable);
6669         }
6670
6671         if (block & RADEON_CG_BLOCK_HDP) {
6672                 cik_enable_hdp_mgcg(rdev, enable);
6673                 cik_enable_hdp_ls(rdev, enable);
6674         }
6675
6676         if (block & RADEON_CG_BLOCK_VCE) {
6677                 vce_v2_0_enable_mgcg(rdev, enable);
6678         }
6679 }
6680
6681 static void cik_init_cg(struct radeon_device *rdev)
6682 {
6683
6684         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6685
6686         if (rdev->has_uvd)
6687                 si_init_uvd_internal_cg(rdev);
6688
6689         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6690                              RADEON_CG_BLOCK_SDMA |
6691                              RADEON_CG_BLOCK_BIF |
6692                              RADEON_CG_BLOCK_UVD |
6693                              RADEON_CG_BLOCK_HDP), true);
6694 }
6695
6696 static void cik_fini_cg(struct radeon_device *rdev)
6697 {
6698         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6699                              RADEON_CG_BLOCK_SDMA |
6700                              RADEON_CG_BLOCK_BIF |
6701                              RADEON_CG_BLOCK_UVD |
6702                              RADEON_CG_BLOCK_HDP), false);
6703
6704         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6705 }
6706
6707 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6708                                           bool enable)
6709 {
6710         u32 data, orig;
6711
6712         orig = data = RREG32(RLC_PG_CNTL);
6713         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6714                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6715         else
6716                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6717         if (orig != data)
6718                 WREG32(RLC_PG_CNTL, data);
6719 }
6720
6721 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6722                                           bool enable)
6723 {
6724         u32 data, orig;
6725
6726         orig = data = RREG32(RLC_PG_CNTL);
6727         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6728                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6729         else
6730                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6731         if (orig != data)
6732                 WREG32(RLC_PG_CNTL, data);
6733 }
6734
6735 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6736 {
6737         u32 data, orig;
6738
6739         orig = data = RREG32(RLC_PG_CNTL);
6740         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6741                 data &= ~DISABLE_CP_PG;
6742         else
6743                 data |= DISABLE_CP_PG;
6744         if (orig != data)
6745                 WREG32(RLC_PG_CNTL, data);
6746 }
6747
6748 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6749 {
6750         u32 data, orig;
6751
6752         orig = data = RREG32(RLC_PG_CNTL);
6753         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6754                 data &= ~DISABLE_GDS_PG;
6755         else
6756                 data |= DISABLE_GDS_PG;
6757         if (orig != data)
6758                 WREG32(RLC_PG_CNTL, data);
6759 }
6760
6761 #define CP_ME_TABLE_SIZE    96
6762 #define CP_ME_TABLE_OFFSET  2048
6763 #define CP_MEC_TABLE_OFFSET 4096
6764
6765 void cik_init_cp_pg_table(struct radeon_device *rdev)
6766 {
6767         volatile u32 *dst_ptr;
6768         int me, i, max_me = 4;
6769         u32 bo_offset = 0;
6770         u32 table_offset, table_size;
6771
6772         if (rdev->family == CHIP_KAVERI)
6773                 max_me = 5;
6774
6775         if (rdev->rlc.cp_table_ptr == NULL)
6776                 return;
6777
6778         /* write the cp table buffer */
6779         dst_ptr = rdev->rlc.cp_table_ptr;
6780         for (me = 0; me < max_me; me++) {
6781                 if (rdev->new_fw) {
6782                         const __le32 *fw_data;
6783                         const struct gfx_firmware_header_v1_0 *hdr;
6784
6785                         if (me == 0) {
6786                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6787                                 fw_data = (const __le32 *)
6788                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6789                                 table_offset = le32_to_cpu(hdr->jt_offset);
6790                                 table_size = le32_to_cpu(hdr->jt_size);
6791                         } else if (me == 1) {
6792                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6793                                 fw_data = (const __le32 *)
6794                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6795                                 table_offset = le32_to_cpu(hdr->jt_offset);
6796                                 table_size = le32_to_cpu(hdr->jt_size);
6797                         } else if (me == 2) {
6798                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6799                                 fw_data = (const __le32 *)
6800                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6801                                 table_offset = le32_to_cpu(hdr->jt_offset);
6802                                 table_size = le32_to_cpu(hdr->jt_size);
6803                         } else if (me == 3) {
6804                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6805                                 fw_data = (const __le32 *)
6806                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6807                                 table_offset = le32_to_cpu(hdr->jt_offset);
6808                                 table_size = le32_to_cpu(hdr->jt_size);
6809                         } else {
6810                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6811                                 fw_data = (const __le32 *)
6812                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6813                                 table_offset = le32_to_cpu(hdr->jt_offset);
6814                                 table_size = le32_to_cpu(hdr->jt_size);
6815                         }
6816
6817                         for (i = 0; i < table_size; i ++) {
6818                                 dst_ptr[bo_offset + i] =
6819                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6820                         }
6821                         bo_offset += table_size;
6822                 } else {
6823                         const __be32 *fw_data;
6824                         table_size = CP_ME_TABLE_SIZE;
6825
6826                         if (me == 0) {
6827                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6828                                 table_offset = CP_ME_TABLE_OFFSET;
6829                         } else if (me == 1) {
6830                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6831                                 table_offset = CP_ME_TABLE_OFFSET;
6832                         } else if (me == 2) {
6833                                 fw_data = (const __be32 *)rdev->me_fw->data;
6834                                 table_offset = CP_ME_TABLE_OFFSET;
6835                         } else {
6836                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6837                                 table_offset = CP_MEC_TABLE_OFFSET;
6838                         }
6839
6840                         for (i = 0; i < table_size; i ++) {
6841                                 dst_ptr[bo_offset + i] =
6842                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6843                         }
6844                         bo_offset += table_size;
6845                 }
6846         }
6847 }
6848
6849 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6850                                 bool enable)
6851 {
6852         u32 data, orig;
6853
6854         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6855                 orig = data = RREG32(RLC_PG_CNTL);
6856                 data |= GFX_PG_ENABLE;
6857                 if (orig != data)
6858                         WREG32(RLC_PG_CNTL, data);
6859
6860                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6861                 data |= AUTO_PG_EN;
6862                 if (orig != data)
6863                         WREG32(RLC_AUTO_PG_CTRL, data);
6864         } else {
6865                 orig = data = RREG32(RLC_PG_CNTL);
6866                 data &= ~GFX_PG_ENABLE;
6867                 if (orig != data)
6868                         WREG32(RLC_PG_CNTL, data);
6869
6870                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6871                 data &= ~AUTO_PG_EN;
6872                 if (orig != data)
6873                         WREG32(RLC_AUTO_PG_CTRL, data);
6874
6875                 data = RREG32(DB_RENDER_CONTROL);
6876         }
6877 }
6878
6879 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6880 {
6881         u32 mask = 0, tmp, tmp1;
6882         int i;
6883
6884         mutex_lock(&rdev->grbm_idx_mutex);
6885         cik_select_se_sh(rdev, se, sh);
6886         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6887         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6888         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6889         mutex_unlock(&rdev->grbm_idx_mutex);
6890
6891         tmp &= 0xffff0000;
6892
6893         tmp |= tmp1;
6894         tmp >>= 16;
6895
6896         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6897                 mask <<= 1;
6898                 mask |= 1;
6899         }
6900
6901         return (~tmp) & mask;
6902 }
6903
6904 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6905 {
6906         u32 i, j, k, active_cu_number = 0;
6907         u32 mask, counter, cu_bitmap;
6908         u32 tmp = 0;
6909
6910         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6911                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6912                         mask = 1;
6913                         cu_bitmap = 0;
6914                         counter = 0;
6915                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6916                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6917                                         if (counter < 2)
6918                                                 cu_bitmap |= mask;
6919                                         counter ++;
6920                                 }
6921                                 mask <<= 1;
6922                         }
6923
6924                         active_cu_number += counter;
6925                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6926                 }
6927         }
6928
6929         WREG32(RLC_PG_AO_CU_MASK, tmp);
6930
6931         tmp = RREG32(RLC_MAX_PG_CU);
6932         tmp &= ~MAX_PU_CU_MASK;
6933         tmp |= MAX_PU_CU(active_cu_number);
6934         WREG32(RLC_MAX_PG_CU, tmp);
6935 }
6936
6937 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6938                                        bool enable)
6939 {
6940         u32 data, orig;
6941
6942         orig = data = RREG32(RLC_PG_CNTL);
6943         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6944                 data |= STATIC_PER_CU_PG_ENABLE;
6945         else
6946                 data &= ~STATIC_PER_CU_PG_ENABLE;
6947         if (orig != data)
6948                 WREG32(RLC_PG_CNTL, data);
6949 }
6950
6951 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6952                                         bool enable)
6953 {
6954         u32 data, orig;
6955
6956         orig = data = RREG32(RLC_PG_CNTL);
6957         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6958                 data |= DYN_PER_CU_PG_ENABLE;
6959         else
6960                 data &= ~DYN_PER_CU_PG_ENABLE;
6961         if (orig != data)
6962                 WREG32(RLC_PG_CNTL, data);
6963 }
6964
6965 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6966 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6967
6968 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6969 {
6970         u32 data, orig;
6971         u32 i;
6972
6973         if (rdev->rlc.cs_data) {
6974                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6975                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6976                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6977                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6978         } else {
6979                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6980                 for (i = 0; i < 3; i++)
6981                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6982         }
6983         if (rdev->rlc.reg_list) {
6984                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6985                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6986                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6987         }
6988
6989         orig = data = RREG32(RLC_PG_CNTL);
6990         data |= GFX_PG_SRC;
6991         if (orig != data)
6992                 WREG32(RLC_PG_CNTL, data);
6993
6994         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6995         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6996
6997         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6998         data &= ~IDLE_POLL_COUNT_MASK;
6999         data |= IDLE_POLL_COUNT(0x60);
7000         WREG32(CP_RB_WPTR_POLL_CNTL, data);
7001
7002         data = 0x10101010;
7003         WREG32(RLC_PG_DELAY, data);
7004
7005         data = RREG32(RLC_PG_DELAY_2);
7006         data &= ~0xff;
7007         data |= 0x3;
7008         WREG32(RLC_PG_DELAY_2, data);
7009
7010         data = RREG32(RLC_AUTO_PG_CTRL);
7011         data &= ~GRBM_REG_SGIT_MASK;
7012         data |= GRBM_REG_SGIT(0x700);
7013         WREG32(RLC_AUTO_PG_CTRL, data);
7014
7015 }
7016
7017 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7018 {
7019         cik_enable_gfx_cgpg(rdev, enable);
7020         cik_enable_gfx_static_mgpg(rdev, enable);
7021         cik_enable_gfx_dynamic_mgpg(rdev, enable);
7022 }
7023
7024 u32 cik_get_csb_size(struct radeon_device *rdev)
7025 {
7026         u32 count = 0;
7027         const struct cs_section_def *sect = NULL;
7028         const struct cs_extent_def *ext = NULL;
7029
7030         if (rdev->rlc.cs_data == NULL)
7031                 return 0;
7032
7033         /* begin clear state */
7034         count += 2;
7035         /* context control state */
7036         count += 3;
7037
7038         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7039                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7040                         if (sect->id == SECT_CONTEXT)
7041                                 count += 2 + ext->reg_count;
7042                         else
7043                                 return 0;
7044                 }
7045         }
7046         /* pa_sc_raster_config/pa_sc_raster_config1 */
7047         count += 4;
7048         /* end clear state */
7049         count += 2;
7050         /* clear state */
7051         count += 2;
7052
7053         return count;
7054 }
7055
7056 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7057 {
7058         u32 count = 0, i;
7059         const struct cs_section_def *sect = NULL;
7060         const struct cs_extent_def *ext = NULL;
7061
7062         if (rdev->rlc.cs_data == NULL)
7063                 return;
7064         if (buffer == NULL)
7065                 return;
7066
7067         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7068         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7069
7070         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7071         buffer[count++] = cpu_to_le32(0x80000000);
7072         buffer[count++] = cpu_to_le32(0x80000000);
7073
7074         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7075                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7076                         if (sect->id == SECT_CONTEXT) {
7077                                 buffer[count++] =
7078                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7079                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7080                                 for (i = 0; i < ext->reg_count; i++)
7081                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7082                         } else {
7083                                 return;
7084                         }
7085                 }
7086         }
7087
7088         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7089         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7090         switch (rdev->family) {
7091         case CHIP_BONAIRE:
7092                 buffer[count++] = cpu_to_le32(0x16000012);
7093                 buffer[count++] = cpu_to_le32(0x00000000);
7094                 break;
7095         case CHIP_KAVERI:
7096                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7097                 buffer[count++] = cpu_to_le32(0x00000000);
7098                 break;
7099         case CHIP_KABINI:
7100         case CHIP_MULLINS:
7101                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7102                 buffer[count++] = cpu_to_le32(0x00000000);
7103                 break;
7104         case CHIP_HAWAII:
7105                 buffer[count++] = cpu_to_le32(0x3a00161a);
7106                 buffer[count++] = cpu_to_le32(0x0000002e);
7107                 break;
7108         default:
7109                 buffer[count++] = cpu_to_le32(0x00000000);
7110                 buffer[count++] = cpu_to_le32(0x00000000);
7111                 break;
7112         }
7113
7114         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7115         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7116
7117         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7118         buffer[count++] = cpu_to_le32(0);
7119 }
7120
7121 static void cik_init_pg(struct radeon_device *rdev)
7122 {
7123         if (rdev->pg_flags) {
7124                 cik_enable_sck_slowdown_on_pu(rdev, true);
7125                 cik_enable_sck_slowdown_on_pd(rdev, true);
7126                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7127                         cik_init_gfx_cgpg(rdev);
7128                         cik_enable_cp_pg(rdev, true);
7129                         cik_enable_gds_pg(rdev, true);
7130                 }
7131                 cik_init_ao_cu_mask(rdev);
7132                 cik_update_gfx_pg(rdev, true);
7133         }
7134 }
7135
7136 static void cik_fini_pg(struct radeon_device *rdev)
7137 {
7138         if (rdev->pg_flags) {
7139                 cik_update_gfx_pg(rdev, false);
7140                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7141                         cik_enable_cp_pg(rdev, false);
7142                         cik_enable_gds_pg(rdev, false);
7143                 }
7144         }
7145 }
7146
7147 /*
7148  * Interrupts
7149  * Starting with r6xx, interrupts are handled via a ring buffer.
7150  * Ring buffers are areas of GPU accessible memory that the GPU
7151  * writes interrupt vectors into and the host reads vectors out of.
7152  * There is a rptr (read pointer) that determines where the
7153  * host is currently reading, and a wptr (write pointer)
7154  * which determines where the GPU has written.  When the
7155  * pointers are equal, the ring is idle.  When the GPU
7156  * writes vectors to the ring buffer, it increments the
7157  * wptr.  When there is an interrupt, the host then starts
7158  * fetching commands and processing them until the pointers are
7159  * equal again at which point it updates the rptr.
7160  */
7161
7162 /**
7163  * cik_enable_interrupts - Enable the interrupt ring buffer
7164  *
7165  * @rdev: radeon_device pointer
7166  *
7167  * Enable the interrupt ring buffer (CIK).
7168  */
7169 static void cik_enable_interrupts(struct radeon_device *rdev)
7170 {
7171         u32 ih_cntl = RREG32(IH_CNTL);
7172         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7173
7174         ih_cntl |= ENABLE_INTR;
7175         ih_rb_cntl |= IH_RB_ENABLE;
7176         WREG32(IH_CNTL, ih_cntl);
7177         WREG32(IH_RB_CNTL, ih_rb_cntl);
7178         rdev->ih.enabled = true;
7179 }
7180
7181 /**
7182  * cik_disable_interrupts - Disable the interrupt ring buffer
7183  *
7184  * @rdev: radeon_device pointer
7185  *
7186  * Disable the interrupt ring buffer (CIK).
7187  */
7188 static void cik_disable_interrupts(struct radeon_device *rdev)
7189 {
7190         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7191         u32 ih_cntl = RREG32(IH_CNTL);
7192
7193         ih_rb_cntl &= ~IH_RB_ENABLE;
7194         ih_cntl &= ~ENABLE_INTR;
7195         WREG32(IH_RB_CNTL, ih_rb_cntl);
7196         WREG32(IH_CNTL, ih_cntl);
7197         /* set rptr, wptr to 0 */
7198         WREG32(IH_RB_RPTR, 0);
7199         WREG32(IH_RB_WPTR, 0);
7200         rdev->ih.enabled = false;
7201         rdev->ih.rptr = 0;
7202 }
7203
7204 /**
7205  * cik_disable_interrupt_state - Disable all interrupt sources
7206  *
7207  * @rdev: radeon_device pointer
7208  *
7209  * Clear all interrupt enable bits used by the driver (CIK).
7210  */
7211 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7212 {
7213         u32 tmp;
7214
7215         /* gfx ring */
7216         tmp = RREG32(CP_INT_CNTL_RING0) &
7217                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7218         WREG32(CP_INT_CNTL_RING0, tmp);
7219         /* sdma */
7220         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7221         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7222         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7223         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7224         /* compute queues */
7225         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7226         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7227         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7228         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7229         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7230         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7231         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7232         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7233         /* grbm */
7234         WREG32(GRBM_INT_CNTL, 0);
7235         /* SRBM */
7236         WREG32(SRBM_INT_CNTL, 0);
7237         /* vline/vblank, etc. */
7238         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7239         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7240         if (rdev->num_crtc >= 4) {
7241                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7242                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7243         }
7244         if (rdev->num_crtc >= 6) {
7245                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7246                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7247         }
7248         /* pflip */
7249         if (rdev->num_crtc >= 2) {
7250                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7251                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7252         }
7253         if (rdev->num_crtc >= 4) {
7254                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7255                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7256         }
7257         if (rdev->num_crtc >= 6) {
7258                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7259                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7260         }
7261
7262         /* dac hotplug */
7263         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7264
7265         /* digital hotplug */
7266         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7267         WREG32(DC_HPD1_INT_CONTROL, tmp);
7268         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7269         WREG32(DC_HPD2_INT_CONTROL, tmp);
7270         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7271         WREG32(DC_HPD3_INT_CONTROL, tmp);
7272         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7273         WREG32(DC_HPD4_INT_CONTROL, tmp);
7274         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7275         WREG32(DC_HPD5_INT_CONTROL, tmp);
7276         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7277         WREG32(DC_HPD6_INT_CONTROL, tmp);
7278
7279 }
7280
7281 /**
7282  * cik_irq_init - init and enable the interrupt ring
7283  *
7284  * @rdev: radeon_device pointer
7285  *
7286  * Allocate a ring buffer for the interrupt controller,
7287  * enable the RLC, disable interrupts, enable the IH
7288  * ring buffer and enable it (CIK).
7289  * Called at device load and reume.
7290  * Returns 0 for success, errors for failure.
7291  */
7292 static int cik_irq_init(struct radeon_device *rdev)
7293 {
7294         int ret = 0;
7295         int rb_bufsz;
7296         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7297
7298         /* allocate ring */
7299         ret = r600_ih_ring_alloc(rdev);
7300         if (ret)
7301                 return ret;
7302
7303         /* disable irqs */
7304         cik_disable_interrupts(rdev);
7305
7306         /* init rlc */
7307         ret = cik_rlc_resume(rdev);
7308         if (ret) {
7309                 r600_ih_ring_fini(rdev);
7310                 return ret;
7311         }
7312
7313         /* setup interrupt control */
7314         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7315         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7316         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7317         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7318          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7319          */
7320         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7321         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7322         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7323         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7324
7325         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7326         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7327
7328         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7329                       IH_WPTR_OVERFLOW_CLEAR |
7330                       (rb_bufsz << 1));
7331
7332         if (rdev->wb.enabled)
7333                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7334
7335         /* set the writeback address whether it's enabled or not */
7336         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7337         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7338
7339         WREG32(IH_RB_CNTL, ih_rb_cntl);
7340
7341         /* set rptr, wptr to 0 */
7342         WREG32(IH_RB_RPTR, 0);
7343         WREG32(IH_RB_WPTR, 0);
7344
7345         /* Default settings for IH_CNTL (disabled at first) */
7346         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7347         /* RPTR_REARM only works if msi's are enabled */
7348         if (rdev->msi_enabled)
7349                 ih_cntl |= RPTR_REARM;
7350         WREG32(IH_CNTL, ih_cntl);
7351
7352         /* force the active interrupt state to all disabled */
7353         cik_disable_interrupt_state(rdev);
7354
7355         pci_set_master(rdev->pdev);
7356
7357         /* enable irqs */
7358         cik_enable_interrupts(rdev);
7359
7360         return ret;
7361 }
7362
7363 /**
7364  * cik_irq_set - enable/disable interrupt sources
7365  *
7366  * @rdev: radeon_device pointer
7367  *
7368  * Enable interrupt sources on the GPU (vblanks, hpd,
7369  * etc.) (CIK).
7370  * Returns 0 for success, errors for failure.
7371  */
7372 int cik_irq_set(struct radeon_device *rdev)
7373 {
7374         u32 cp_int_cntl;
7375         u32 cp_m1p0;
7376         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7377         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7378         u32 grbm_int_cntl = 0;
7379         u32 dma_cntl, dma_cntl1;
7380
7381         if (!rdev->irq.installed) {
7382                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7383                 return -EINVAL;
7384         }
7385         /* don't enable anything if the ih is disabled */
7386         if (!rdev->ih.enabled) {
7387                 cik_disable_interrupts(rdev);
7388                 /* force the active interrupt state to all disabled */
7389                 cik_disable_interrupt_state(rdev);
7390                 return 0;
7391         }
7392
7393         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7394                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7395         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7396
7397         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7398         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7399         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7400         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7401         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7402         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7403
7404         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7405         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7406
7407         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7408
7409         /* enable CP interrupts on all rings */
7410         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7411                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7412                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7413         }
7414         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7415                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7416                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7417                 if (ring->me == 1) {
7418                         switch (ring->pipe) {
7419                         case 0:
7420                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7421                                 break;
7422                         default:
7423                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7424                                 break;
7425                         }
7426                 } else {
7427                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7428                 }
7429         }
7430         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7431                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7432                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7433                 if (ring->me == 1) {
7434                         switch (ring->pipe) {
7435                         case 0:
7436                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7437                                 break;
7438                         default:
7439                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7440                                 break;
7441                         }
7442                 } else {
7443                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7444                 }
7445         }
7446
7447         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7448                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7449                 dma_cntl |= TRAP_ENABLE;
7450         }
7451
7452         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7453                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7454                 dma_cntl1 |= TRAP_ENABLE;
7455         }
7456
7457         if (rdev->irq.crtc_vblank_int[0] ||
7458             atomic_read(&rdev->irq.pflip[0])) {
7459                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7460                 crtc1 |= VBLANK_INTERRUPT_MASK;
7461         }
7462         if (rdev->irq.crtc_vblank_int[1] ||
7463             atomic_read(&rdev->irq.pflip[1])) {
7464                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7465                 crtc2 |= VBLANK_INTERRUPT_MASK;
7466         }
7467         if (rdev->irq.crtc_vblank_int[2] ||
7468             atomic_read(&rdev->irq.pflip[2])) {
7469                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7470                 crtc3 |= VBLANK_INTERRUPT_MASK;
7471         }
7472         if (rdev->irq.crtc_vblank_int[3] ||
7473             atomic_read(&rdev->irq.pflip[3])) {
7474                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7475                 crtc4 |= VBLANK_INTERRUPT_MASK;
7476         }
7477         if (rdev->irq.crtc_vblank_int[4] ||
7478             atomic_read(&rdev->irq.pflip[4])) {
7479                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7480                 crtc5 |= VBLANK_INTERRUPT_MASK;
7481         }
7482         if (rdev->irq.crtc_vblank_int[5] ||
7483             atomic_read(&rdev->irq.pflip[5])) {
7484                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7485                 crtc6 |= VBLANK_INTERRUPT_MASK;
7486         }
7487         if (rdev->irq.hpd[0]) {
7488                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7489                 hpd1 |= DC_HPDx_INT_EN;
7490         }
7491         if (rdev->irq.hpd[1]) {
7492                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7493                 hpd2 |= DC_HPDx_INT_EN;
7494         }
7495         if (rdev->irq.hpd[2]) {
7496                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7497                 hpd3 |= DC_HPDx_INT_EN;
7498         }
7499         if (rdev->irq.hpd[3]) {
7500                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7501                 hpd4 |= DC_HPDx_INT_EN;
7502         }
7503         if (rdev->irq.hpd[4]) {
7504                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7505                 hpd5 |= DC_HPDx_INT_EN;
7506         }
7507         if (rdev->irq.hpd[5]) {
7508                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7509                 hpd6 |= DC_HPDx_INT_EN;
7510         }
7511
7512         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7513
7514         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7515         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7516
7517         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7518
7519         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7520
7521         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7522         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7523         if (rdev->num_crtc >= 4) {
7524                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7525                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7526         }
7527         if (rdev->num_crtc >= 6) {
7528                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7529                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7530         }
7531
7532         if (rdev->num_crtc >= 2) {
7533                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7534                        GRPH_PFLIP_INT_MASK);
7535                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7536                        GRPH_PFLIP_INT_MASK);
7537         }
7538         if (rdev->num_crtc >= 4) {
7539                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7540                        GRPH_PFLIP_INT_MASK);
7541                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7542                        GRPH_PFLIP_INT_MASK);
7543         }
7544         if (rdev->num_crtc >= 6) {
7545                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7546                        GRPH_PFLIP_INT_MASK);
7547                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7548                        GRPH_PFLIP_INT_MASK);
7549         }
7550
7551         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7552         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7553         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7554         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7555         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7556         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7557
7558         /* posting read */
7559         RREG32(SRBM_STATUS);
7560
7561         return 0;
7562 }
7563
7564 /**
7565  * cik_irq_ack - ack interrupt sources
7566  *
7567  * @rdev: radeon_device pointer
7568  *
7569  * Ack interrupt sources on the GPU (vblanks, hpd,
7570  * etc.) (CIK).  Certain interrupts sources are sw
7571  * generated and do not require an explicit ack.
7572  */
7573 static inline void cik_irq_ack(struct radeon_device *rdev)
7574 {
7575         u32 tmp;
7576
7577         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7578         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7579         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7580         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7581         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7582         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7583         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7584
7585         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7586                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7587         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7588                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7589         if (rdev->num_crtc >= 4) {
7590                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7591                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7592                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7593                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7594         }
7595         if (rdev->num_crtc >= 6) {
7596                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7597                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7598                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7599                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7600         }
7601
7602         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7603                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7604                        GRPH_PFLIP_INT_CLEAR);
7605         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7606                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7607                        GRPH_PFLIP_INT_CLEAR);
7608         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7609                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7610         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7611                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7612         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7613                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7614         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7615                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7616
7617         if (rdev->num_crtc >= 4) {
7618                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7619                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7620                                GRPH_PFLIP_INT_CLEAR);
7621                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7622                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7623                                GRPH_PFLIP_INT_CLEAR);
7624                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7625                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7626                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7627                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7628                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7629                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7630                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7631                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7632         }
7633
7634         if (rdev->num_crtc >= 6) {
7635                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7636                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7637                                GRPH_PFLIP_INT_CLEAR);
7638                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7639                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7640                                GRPH_PFLIP_INT_CLEAR);
7641                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7642                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7643                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7644                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7645                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7646                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7647                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7648                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7649         }
7650
7651         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7652                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7653                 tmp |= DC_HPDx_INT_ACK;
7654                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7655         }
7656         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7657                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7658                 tmp |= DC_HPDx_INT_ACK;
7659                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7660         }
7661         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7662                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7663                 tmp |= DC_HPDx_INT_ACK;
7664                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7665         }
7666         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7667                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7668                 tmp |= DC_HPDx_INT_ACK;
7669                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7670         }
7671         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7672                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7673                 tmp |= DC_HPDx_INT_ACK;
7674                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7675         }
7676         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7677                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7678                 tmp |= DC_HPDx_INT_ACK;
7679                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7680         }
7681 }
7682
7683 /**
7684  * cik_irq_disable - disable interrupts
7685  *
7686  * @rdev: radeon_device pointer
7687  *
7688  * Disable interrupts on the hw (CIK).
7689  */
7690 static void cik_irq_disable(struct radeon_device *rdev)
7691 {
7692         cik_disable_interrupts(rdev);
7693         /* Wait and acknowledge irq */
7694         mdelay(1);
7695         cik_irq_ack(rdev);
7696         cik_disable_interrupt_state(rdev);
7697 }
7698
7699 /**
7700  * cik_irq_disable - disable interrupts for suspend
7701  *
7702  * @rdev: radeon_device pointer
7703  *
7704  * Disable interrupts and stop the RLC (CIK).
7705  * Used for suspend.
7706  */
7707 static void cik_irq_suspend(struct radeon_device *rdev)
7708 {
7709         cik_irq_disable(rdev);
7710         cik_rlc_stop(rdev);
7711 }
7712
7713 /**
7714  * cik_irq_fini - tear down interrupt support
7715  *
7716  * @rdev: radeon_device pointer
7717  *
7718  * Disable interrupts on the hw and free the IH ring
7719  * buffer (CIK).
7720  * Used for driver unload.
7721  */
7722 static void cik_irq_fini(struct radeon_device *rdev)
7723 {
7724         cik_irq_suspend(rdev);
7725         r600_ih_ring_fini(rdev);
7726 }
7727
7728 /**
7729  * cik_get_ih_wptr - get the IH ring buffer wptr
7730  *
7731  * @rdev: radeon_device pointer
7732  *
7733  * Get the IH ring buffer wptr from either the register
7734  * or the writeback memory buffer (CIK).  Also check for
7735  * ring buffer overflow and deal with it.
7736  * Used by cik_irq_process().
7737  * Returns the value of the wptr.
7738  */
7739 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7740 {
7741         u32 wptr, tmp;
7742
7743         if (rdev->wb.enabled)
7744                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7745         else
7746                 wptr = RREG32(IH_RB_WPTR);
7747
7748         if (wptr & RB_OVERFLOW) {
7749                 wptr &= ~RB_OVERFLOW;
7750                 /* When a ring buffer overflow happen start parsing interrupt
7751                  * from the last not overwritten vector (wptr + 16). Hopefully
7752                  * this should allow us to catchup.
7753                  */
7754                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7755                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7756                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7757                 tmp = RREG32(IH_RB_CNTL);
7758                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7759                 WREG32(IH_RB_CNTL, tmp);
7760         }
7761         return (wptr & rdev->ih.ptr_mask);
7762 }
7763
7764 /*        CIK IV Ring
7765  * Each IV ring entry is 128 bits:
7766  * [7:0]    - interrupt source id
7767  * [31:8]   - reserved
7768  * [59:32]  - interrupt source data
7769  * [63:60]  - reserved
7770  * [71:64]  - RINGID
7771  *            CP:
7772  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7773  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7774  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7775  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7776  *            PIPE_ID - ME0 0=3D
7777  *                    - ME1&2 compute dispatcher (4 pipes each)
7778  *            SDMA:
7779  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7780  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7781  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7782  * [79:72]  - VMID
7783  * [95:80]  - PASID
7784  * [127:96] - reserved
7785  */
7786 /**
7787  * cik_irq_process - interrupt handler
7788  *
7789  * @rdev: radeon_device pointer
7790  *
7791  * Interrupt hander (CIK).  Walk the IH ring,
7792  * ack interrupts and schedule work to handle
7793  * interrupt events.
7794  * Returns irq process return code.
7795  */
7796 int cik_irq_process(struct radeon_device *rdev)
7797 {
7798         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7799         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7800         u32 wptr;
7801         u32 rptr;
7802         u32 src_id, src_data, ring_id;
7803         u8 me_id, pipe_id, queue_id;
7804         u32 ring_index;
7805         bool queue_hotplug = false;
7806         bool queue_reset = false;
7807         u32 addr, status, mc_client;
7808         bool queue_thermal = false;
7809
7810         if (!rdev->ih.enabled || rdev->shutdown)
7811                 return IRQ_NONE;
7812
7813         wptr = cik_get_ih_wptr(rdev);
7814
7815 restart_ih:
7816         /* is somebody else already processing irqs? */
7817         if (atomic_xchg(&rdev->ih.lock, 1))
7818                 return IRQ_NONE;
7819
7820         rptr = rdev->ih.rptr;
7821         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7822
7823         /* Order reading of wptr vs. reading of IH ring data */
7824         rmb();
7825
7826         /* display interrupts */
7827         cik_irq_ack(rdev);
7828
7829         while (rptr != wptr) {
7830                 /* wptr/rptr are in bytes! */
7831                 ring_index = rptr / 4;
7832
7833                 radeon_kfd_interrupt(rdev,
7834                                 (const void *) &rdev->ih.ring[ring_index]);
7835
7836                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7837                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7838                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7839
7840                 switch (src_id) {
7841                 case 1: /* D1 vblank/vline */
7842                         switch (src_data) {
7843                         case 0: /* D1 vblank */
7844                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7845                                         if (rdev->irq.crtc_vblank_int[0]) {
7846                                                 drm_handle_vblank(rdev->ddev, 0);
7847                                                 rdev->pm.vblank_sync = true;
7848                                                 wake_up(&rdev->irq.vblank_queue);
7849                                         }
7850                                         if (atomic_read(&rdev->irq.pflip[0]))
7851                                                 radeon_crtc_handle_vblank(rdev, 0);
7852                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7853                                         DRM_DEBUG("IH: D1 vblank\n");
7854                                 }
7855                                 break;
7856                         case 1: /* D1 vline */
7857                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7858                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7859                                         DRM_DEBUG("IH: D1 vline\n");
7860                                 }
7861                                 break;
7862                         default:
7863                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7864                                 break;
7865                         }
7866                         break;
7867                 case 2: /* D2 vblank/vline */
7868                         switch (src_data) {
7869                         case 0: /* D2 vblank */
7870                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7871                                         if (rdev->irq.crtc_vblank_int[1]) {
7872                                                 drm_handle_vblank(rdev->ddev, 1);
7873                                                 rdev->pm.vblank_sync = true;
7874                                                 wake_up(&rdev->irq.vblank_queue);
7875                                         }
7876                                         if (atomic_read(&rdev->irq.pflip[1]))
7877                                                 radeon_crtc_handle_vblank(rdev, 1);
7878                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7879                                         DRM_DEBUG("IH: D2 vblank\n");
7880                                 }
7881                                 break;
7882                         case 1: /* D2 vline */
7883                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7884                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7885                                         DRM_DEBUG("IH: D2 vline\n");
7886                                 }
7887                                 break;
7888                         default:
7889                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7890                                 break;
7891                         }
7892                         break;
7893                 case 3: /* D3 vblank/vline */
7894                         switch (src_data) {
7895                         case 0: /* D3 vblank */
7896                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7897                                         if (rdev->irq.crtc_vblank_int[2]) {
7898                                                 drm_handle_vblank(rdev->ddev, 2);
7899                                                 rdev->pm.vblank_sync = true;
7900                                                 wake_up(&rdev->irq.vblank_queue);
7901                                         }
7902                                         if (atomic_read(&rdev->irq.pflip[2]))
7903                                                 radeon_crtc_handle_vblank(rdev, 2);
7904                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7905                                         DRM_DEBUG("IH: D3 vblank\n");
7906                                 }
7907                                 break;
7908                         case 1: /* D3 vline */
7909                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7910                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7911                                         DRM_DEBUG("IH: D3 vline\n");
7912                                 }
7913                                 break;
7914                         default:
7915                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7916                                 break;
7917                         }
7918                         break;
7919                 case 4: /* D4 vblank/vline */
7920                         switch (src_data) {
7921                         case 0: /* D4 vblank */
7922                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7923                                         if (rdev->irq.crtc_vblank_int[3]) {
7924                                                 drm_handle_vblank(rdev->ddev, 3);
7925                                                 rdev->pm.vblank_sync = true;
7926                                                 wake_up(&rdev->irq.vblank_queue);
7927                                         }
7928                                         if (atomic_read(&rdev->irq.pflip[3]))
7929                                                 radeon_crtc_handle_vblank(rdev, 3);
7930                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7931                                         DRM_DEBUG("IH: D4 vblank\n");
7932                                 }
7933                                 break;
7934                         case 1: /* D4 vline */
7935                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7936                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7937                                         DRM_DEBUG("IH: D4 vline\n");
7938                                 }
7939                                 break;
7940                         default:
7941                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7942                                 break;
7943                         }
7944                         break;
7945                 case 5: /* D5 vblank/vline */
7946                         switch (src_data) {
7947                         case 0: /* D5 vblank */
7948                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7949                                         if (rdev->irq.crtc_vblank_int[4]) {
7950                                                 drm_handle_vblank(rdev->ddev, 4);
7951                                                 rdev->pm.vblank_sync = true;
7952                                                 wake_up(&rdev->irq.vblank_queue);
7953                                         }
7954                                         if (atomic_read(&rdev->irq.pflip[4]))
7955                                                 radeon_crtc_handle_vblank(rdev, 4);
7956                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7957                                         DRM_DEBUG("IH: D5 vblank\n");
7958                                 }
7959                                 break;
7960                         case 1: /* D5 vline */
7961                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7962                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7963                                         DRM_DEBUG("IH: D5 vline\n");
7964                                 }
7965                                 break;
7966                         default:
7967                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7968                                 break;
7969                         }
7970                         break;
7971                 case 6: /* D6 vblank/vline */
7972                         switch (src_data) {
7973                         case 0: /* D6 vblank */
7974                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7975                                         if (rdev->irq.crtc_vblank_int[5]) {
7976                                                 drm_handle_vblank(rdev->ddev, 5);
7977                                                 rdev->pm.vblank_sync = true;
7978                                                 wake_up(&rdev->irq.vblank_queue);
7979                                         }
7980                                         if (atomic_read(&rdev->irq.pflip[5]))
7981                                                 radeon_crtc_handle_vblank(rdev, 5);
7982                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7983                                         DRM_DEBUG("IH: D6 vblank\n");
7984                                 }
7985                                 break;
7986                         case 1: /* D6 vline */
7987                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7988                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7989                                         DRM_DEBUG("IH: D6 vline\n");
7990                                 }
7991                                 break;
7992                         default:
7993                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7994                                 break;
7995                         }
7996                         break;
7997                 case 8: /* D1 page flip */
7998                 case 10: /* D2 page flip */
7999                 case 12: /* D3 page flip */
8000                 case 14: /* D4 page flip */
8001                 case 16: /* D5 page flip */
8002                 case 18: /* D6 page flip */
8003                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8004                         if (radeon_use_pflipirq > 0)
8005                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8006                         break;
8007                 case 42: /* HPD hotplug */
8008                         switch (src_data) {
8009                         case 0:
8010                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8011                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8012                                         queue_hotplug = true;
8013                                         DRM_DEBUG("IH: HPD1\n");
8014                                 }
8015                                 break;
8016                         case 1:
8017                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8018                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8019                                         queue_hotplug = true;
8020                                         DRM_DEBUG("IH: HPD2\n");
8021                                 }
8022                                 break;
8023                         case 2:
8024                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8025                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8026                                         queue_hotplug = true;
8027                                         DRM_DEBUG("IH: HPD3\n");
8028                                 }
8029                                 break;
8030                         case 3:
8031                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8032                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8033                                         queue_hotplug = true;
8034                                         DRM_DEBUG("IH: HPD4\n");
8035                                 }
8036                                 break;
8037                         case 4:
8038                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8039                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8040                                         queue_hotplug = true;
8041                                         DRM_DEBUG("IH: HPD5\n");
8042                                 }
8043                                 break;
8044                         case 5:
8045                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8046                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8047                                         queue_hotplug = true;
8048                                         DRM_DEBUG("IH: HPD6\n");
8049                                 }
8050                                 break;
8051                         default:
8052                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8053                                 break;
8054                         }
8055                         break;
8056                 case 96:
8057                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8058                         WREG32(SRBM_INT_ACK, 0x1);
8059                         break;
8060                 case 124: /* UVD */
8061                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8062                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8063                         break;
8064                 case 146:
8065                 case 147:
8066                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8067                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8068                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8069                         /* reset addr and status */
8070                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8071                         if (addr == 0x0 && status == 0x0)
8072                                 break;
8073                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8074                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8075                                 addr);
8076                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8077                                 status);
8078                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8079                         break;
8080                 case 167: /* VCE */
8081                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8082                         switch (src_data) {
8083                         case 0:
8084                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8085                                 break;
8086                         case 1:
8087                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8088                                 break;
8089                         default:
8090                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8091                                 break;
8092                         }
8093                         break;
8094                 case 176: /* GFX RB CP_INT */
8095                 case 177: /* GFX IB CP_INT */
8096                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8097                         break;
8098                 case 181: /* CP EOP event */
8099                         DRM_DEBUG("IH: CP EOP\n");
8100                         /* XXX check the bitfield order! */
8101                         me_id = (ring_id & 0x60) >> 5;
8102                         pipe_id = (ring_id & 0x18) >> 3;
8103                         queue_id = (ring_id & 0x7) >> 0;
8104                         switch (me_id) {
8105                         case 0:
8106                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8107                                 break;
8108                         case 1:
8109                         case 2:
8110                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8111                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8112                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8113                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8114                                 break;
8115                         }
8116                         break;
8117                 case 184: /* CP Privileged reg access */
8118                         DRM_ERROR("Illegal register access in command stream\n");
8119                         /* XXX check the bitfield order! */
8120                         me_id = (ring_id & 0x60) >> 5;
8121                         pipe_id = (ring_id & 0x18) >> 3;
8122                         queue_id = (ring_id & 0x7) >> 0;
8123                         switch (me_id) {
8124                         case 0:
8125                                 /* This results in a full GPU reset, but all we need to do is soft
8126                                  * reset the CP for gfx
8127                                  */
8128                                 queue_reset = true;
8129                                 break;
8130                         case 1:
8131                                 /* XXX compute */
8132                                 queue_reset = true;
8133                                 break;
8134                         case 2:
8135                                 /* XXX compute */
8136                                 queue_reset = true;
8137                                 break;
8138                         }
8139                         break;
8140                 case 185: /* CP Privileged inst */
8141                         DRM_ERROR("Illegal instruction in command stream\n");
8142                         /* XXX check the bitfield order! */
8143                         me_id = (ring_id & 0x60) >> 5;
8144                         pipe_id = (ring_id & 0x18) >> 3;
8145                         queue_id = (ring_id & 0x7) >> 0;
8146                         switch (me_id) {
8147                         case 0:
8148                                 /* This results in a full GPU reset, but all we need to do is soft
8149                                  * reset the CP for gfx
8150                                  */
8151                                 queue_reset = true;
8152                                 break;
8153                         case 1:
8154                                 /* XXX compute */
8155                                 queue_reset = true;
8156                                 break;
8157                         case 2:
8158                                 /* XXX compute */
8159                                 queue_reset = true;
8160                                 break;
8161                         }
8162                         break;
8163                 case 224: /* SDMA trap event */
8164                         /* XXX check the bitfield order! */
8165                         me_id = (ring_id & 0x3) >> 0;
8166                         queue_id = (ring_id & 0xc) >> 2;
8167                         DRM_DEBUG("IH: SDMA trap\n");
8168                         switch (me_id) {
8169                         case 0:
8170                                 switch (queue_id) {
8171                                 case 0:
8172                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8173                                         break;
8174                                 case 1:
8175                                         /* XXX compute */
8176                                         break;
8177                                 case 2:
8178                                         /* XXX compute */
8179                                         break;
8180                                 }
8181                                 break;
8182                         case 1:
8183                                 switch (queue_id) {
8184                                 case 0:
8185                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8186                                         break;
8187                                 case 1:
8188                                         /* XXX compute */
8189                                         break;
8190                                 case 2:
8191                                         /* XXX compute */
8192                                         break;
8193                                 }
8194                                 break;
8195                         }
8196                         break;
8197                 case 230: /* thermal low to high */
8198                         DRM_DEBUG("IH: thermal low to high\n");
8199                         rdev->pm.dpm.thermal.high_to_low = false;
8200                         queue_thermal = true;
8201                         break;
8202                 case 231: /* thermal high to low */
8203                         DRM_DEBUG("IH: thermal high to low\n");
8204                         rdev->pm.dpm.thermal.high_to_low = true;
8205                         queue_thermal = true;
8206                         break;
8207                 case 233: /* GUI IDLE */
8208                         DRM_DEBUG("IH: GUI idle\n");
8209                         break;
8210                 case 241: /* SDMA Privileged inst */
8211                 case 247: /* SDMA Privileged inst */
8212                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8213                         /* XXX check the bitfield order! */
8214                         me_id = (ring_id & 0x3) >> 0;
8215                         queue_id = (ring_id & 0xc) >> 2;
8216                         switch (me_id) {
8217                         case 0:
8218                                 switch (queue_id) {
8219                                 case 0:
8220                                         queue_reset = true;
8221                                         break;
8222                                 case 1:
8223                                         /* XXX compute */
8224                                         queue_reset = true;
8225                                         break;
8226                                 case 2:
8227                                         /* XXX compute */
8228                                         queue_reset = true;
8229                                         break;
8230                                 }
8231                                 break;
8232                         case 1:
8233                                 switch (queue_id) {
8234                                 case 0:
8235                                         queue_reset = true;
8236                                         break;
8237                                 case 1:
8238                                         /* XXX compute */
8239                                         queue_reset = true;
8240                                         break;
8241                                 case 2:
8242                                         /* XXX compute */
8243                                         queue_reset = true;
8244                                         break;
8245                                 }
8246                                 break;
8247                         }
8248                         break;
8249                 default:
8250                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8251                         break;
8252                 }
8253
8254                 /* wptr/rptr are in bytes! */
8255                 rptr += 16;
8256                 rptr &= rdev->ih.ptr_mask;
8257                 WREG32(IH_RB_RPTR, rptr);
8258         }
8259         if (queue_hotplug)
8260                 schedule_work(&rdev->hotplug_work);
8261         if (queue_reset) {
8262                 rdev->needs_reset = true;
8263                 wake_up_all(&rdev->fence_queue);
8264         }
8265         if (queue_thermal)
8266                 schedule_work(&rdev->pm.dpm.thermal.work);
8267         rdev->ih.rptr = rptr;
8268         atomic_set(&rdev->ih.lock, 0);
8269
8270         /* make sure wptr hasn't changed while processing */
8271         wptr = cik_get_ih_wptr(rdev);
8272         if (wptr != rptr)
8273                 goto restart_ih;
8274
8275         return IRQ_HANDLED;
8276 }
8277
8278 /*
8279  * startup/shutdown callbacks
8280  */
8281 /**
8282  * cik_startup - program the asic to a functional state
8283  *
8284  * @rdev: radeon_device pointer
8285  *
8286  * Programs the asic to a functional state (CIK).
8287  * Called by cik_init() and cik_resume().
8288  * Returns 0 for success, error for failure.
8289  */
8290 static int cik_startup(struct radeon_device *rdev)
8291 {
8292         struct radeon_ring *ring;
8293         u32 nop;
8294         int r;
8295
8296         /* enable pcie gen2/3 link */
8297         cik_pcie_gen3_enable(rdev);
8298         /* enable aspm */
8299         cik_program_aspm(rdev);
8300
8301         /* scratch needs to be initialized before MC */
8302         r = r600_vram_scratch_init(rdev);
8303         if (r)
8304                 return r;
8305
8306         cik_mc_program(rdev);
8307
8308         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8309                 r = ci_mc_load_microcode(rdev);
8310                 if (r) {
8311                         DRM_ERROR("Failed to load MC firmware!\n");
8312                         return r;
8313                 }
8314         }
8315
8316         r = cik_pcie_gart_enable(rdev);
8317         if (r)
8318                 return r;
8319         cik_gpu_init(rdev);
8320
8321         /* allocate rlc buffers */
8322         if (rdev->flags & RADEON_IS_IGP) {
8323                 if (rdev->family == CHIP_KAVERI) {
8324                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8325                         rdev->rlc.reg_list_size =
8326                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8327                 } else {
8328                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8329                         rdev->rlc.reg_list_size =
8330                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8331                 }
8332         }
8333         rdev->rlc.cs_data = ci_cs_data;
8334         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8335         r = sumo_rlc_init(rdev);
8336         if (r) {
8337                 DRM_ERROR("Failed to init rlc BOs!\n");
8338                 return r;
8339         }
8340
8341         /* allocate wb buffer */
8342         r = radeon_wb_init(rdev);
8343         if (r)
8344                 return r;
8345
8346         /* allocate mec buffers */
8347         r = cik_mec_init(rdev);
8348         if (r) {
8349                 DRM_ERROR("Failed to init MEC BOs!\n");
8350                 return r;
8351         }
8352
8353         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8354         if (r) {
8355                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8356                 return r;
8357         }
8358
8359         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8360         if (r) {
8361                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8362                 return r;
8363         }
8364
8365         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8366         if (r) {
8367                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8368                 return r;
8369         }
8370
8371         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8372         if (r) {
8373                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8374                 return r;
8375         }
8376
8377         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8378         if (r) {
8379                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8380                 return r;
8381         }
8382
8383         r = radeon_uvd_resume(rdev);
8384         if (!r) {
8385                 r = uvd_v4_2_resume(rdev);
8386                 if (!r) {
8387                         r = radeon_fence_driver_start_ring(rdev,
8388                                                            R600_RING_TYPE_UVD_INDEX);
8389                         if (r)
8390                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8391                 }
8392         }
8393         if (r)
8394                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8395
8396         r = radeon_vce_resume(rdev);
8397         if (!r) {
8398                 r = vce_v2_0_resume(rdev);
8399                 if (!r)
8400                         r = radeon_fence_driver_start_ring(rdev,
8401                                                            TN_RING_TYPE_VCE1_INDEX);
8402                 if (!r)
8403                         r = radeon_fence_driver_start_ring(rdev,
8404                                                            TN_RING_TYPE_VCE2_INDEX);
8405         }
8406         if (r) {
8407                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8408                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8409                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8410         }
8411
8412         /* Enable IRQ */
8413         if (!rdev->irq.installed) {
8414                 r = radeon_irq_kms_init(rdev);
8415                 if (r)
8416                         return r;
8417         }
8418
8419         r = cik_irq_init(rdev);
8420         if (r) {
8421                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8422                 radeon_irq_kms_fini(rdev);
8423                 return r;
8424         }
8425         cik_irq_set(rdev);
8426
8427         if (rdev->family == CHIP_HAWAII) {
8428                 if (rdev->new_fw)
8429                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8430                 else
8431                         nop = RADEON_CP_PACKET2;
8432         } else {
8433                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8434         }
8435
8436         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8437         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8438                              nop);
8439         if (r)
8440                 return r;
8441
8442         /* set up the compute queues */
8443         /* type-2 packets are deprecated on MEC, use type-3 instead */
8444         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8445         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8446                              nop);
8447         if (r)
8448                 return r;
8449         ring->me = 1; /* first MEC */
8450         ring->pipe = 0; /* first pipe */
8451         ring->queue = 0; /* first queue */
8452         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8453
8454         /* type-2 packets are deprecated on MEC, use type-3 instead */
8455         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8456         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8457                              nop);
8458         if (r)
8459                 return r;
8460         /* dGPU only have 1 MEC */
8461         ring->me = 1; /* first MEC */
8462         ring->pipe = 0; /* first pipe */
8463         ring->queue = 1; /* second queue */
8464         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8465
8466         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8467         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8468                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8469         if (r)
8470                 return r;
8471
8472         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8473         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8474                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8475         if (r)
8476                 return r;
8477
8478         r = cik_cp_resume(rdev);
8479         if (r)
8480                 return r;
8481
8482         r = cik_sdma_resume(rdev);
8483         if (r)
8484                 return r;
8485
8486         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8487         if (ring->ring_size) {
8488                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8489                                      RADEON_CP_PACKET2);
8490                 if (!r)
8491                         r = uvd_v1_0_init(rdev);
8492                 if (r)
8493                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8494         }
8495
8496         r = -ENOENT;
8497
8498         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8499         if (ring->ring_size)
8500                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8501                                      VCE_CMD_NO_OP);
8502
8503         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8504         if (ring->ring_size)
8505                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8506                                      VCE_CMD_NO_OP);
8507
8508         if (!r)
8509                 r = vce_v1_0_init(rdev);
8510         else if (r != -ENOENT)
8511                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8512
8513         r = radeon_ib_pool_init(rdev);
8514         if (r) {
8515                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8516                 return r;
8517         }
8518
8519         r = radeon_vm_manager_init(rdev);
8520         if (r) {
8521                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8522                 return r;
8523         }
8524
8525         r = radeon_audio_init(rdev);
8526         if (r)
8527                 return r;
8528
8529         r = radeon_kfd_resume(rdev);
8530         if (r)
8531                 return r;
8532
8533         return 0;
8534 }
8535
8536 /**
8537  * cik_resume - resume the asic to a functional state
8538  *
8539  * @rdev: radeon_device pointer
8540  *
8541  * Programs the asic to a functional state (CIK).
8542  * Called at resume.
8543  * Returns 0 for success, error for failure.
8544  */
8545 int cik_resume(struct radeon_device *rdev)
8546 {
8547         int r;
8548
8549         /* post card */
8550         atom_asic_init(rdev->mode_info.atom_context);
8551
8552         /* init golden registers */
8553         cik_init_golden_registers(rdev);
8554
8555         if (rdev->pm.pm_method == PM_METHOD_DPM)
8556                 radeon_pm_resume(rdev);
8557
8558         rdev->accel_working = true;
8559         r = cik_startup(rdev);
8560         if (r) {
8561                 DRM_ERROR("cik startup failed on resume\n");
8562                 rdev->accel_working = false;
8563                 return r;
8564         }
8565
8566         return r;
8567
8568 }
8569
8570 /**
8571  * cik_suspend - suspend the asic
8572  *
8573  * @rdev: radeon_device pointer
8574  *
8575  * Bring the chip into a state suitable for suspend (CIK).
8576  * Called at suspend.
8577  * Returns 0 for success.
8578  */
8579 int cik_suspend(struct radeon_device *rdev)
8580 {
8581         radeon_kfd_suspend(rdev);
8582         radeon_pm_suspend(rdev);
8583         radeon_audio_fini(rdev);
8584         radeon_vm_manager_fini(rdev);
8585         cik_cp_enable(rdev, false);
8586         cik_sdma_enable(rdev, false);
8587         uvd_v1_0_fini(rdev);
8588         radeon_uvd_suspend(rdev);
8589         radeon_vce_suspend(rdev);
8590         cik_fini_pg(rdev);
8591         cik_fini_cg(rdev);
8592         cik_irq_suspend(rdev);
8593         radeon_wb_disable(rdev);
8594         cik_pcie_gart_disable(rdev);
8595         return 0;
8596 }
8597
8598 /* Plan is to move initialization in that function and use
8599  * helper function so that radeon_device_init pretty much
8600  * do nothing more than calling asic specific function. This
8601  * should also allow to remove a bunch of callback function
8602  * like vram_info.
8603  */
8604 /**
8605  * cik_init - asic specific driver and hw init
8606  *
8607  * @rdev: radeon_device pointer
8608  *
8609  * Setup asic specific driver variables and program the hw
8610  * to a functional state (CIK).
8611  * Called at driver startup.
8612  * Returns 0 for success, errors for failure.
8613  */
8614 int cik_init(struct radeon_device *rdev)
8615 {
8616         struct radeon_ring *ring;
8617         int r;
8618
8619         /* Read BIOS */
8620         if (!radeon_get_bios(rdev)) {
8621                 if (ASIC_IS_AVIVO(rdev))
8622                         return -EINVAL;
8623         }
8624         /* Must be an ATOMBIOS */
8625         if (!rdev->is_atom_bios) {
8626                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8627                 return -EINVAL;
8628         }
8629         r = radeon_atombios_init(rdev);
8630         if (r)
8631                 return r;
8632
8633         /* Post card if necessary */
8634         if (!radeon_card_posted(rdev)) {
8635                 if (!rdev->bios) {
8636                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8637                         return -EINVAL;
8638                 }
8639                 DRM_INFO("GPU not posted. posting now...\n");
8640                 atom_asic_init(rdev->mode_info.atom_context);
8641         }
8642         /* init golden registers */
8643         cik_init_golden_registers(rdev);
8644         /* Initialize scratch registers */
8645         cik_scratch_init(rdev);
8646         /* Initialize surface registers */
8647         radeon_surface_init(rdev);
8648         /* Initialize clocks */
8649         radeon_get_clock_info(rdev->ddev);
8650
8651         /* Fence driver */
8652         r = radeon_fence_driver_init(rdev);
8653         if (r)
8654                 return r;
8655
8656         /* initialize memory controller */
8657         r = cik_mc_init(rdev);
8658         if (r)
8659                 return r;
8660         /* Memory manager */
8661         r = radeon_bo_init(rdev);
8662         if (r)
8663                 return r;
8664
8665         if (rdev->flags & RADEON_IS_IGP) {
8666                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8667                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8668                         r = cik_init_microcode(rdev);
8669                         if (r) {
8670                                 DRM_ERROR("Failed to load firmware!\n");
8671                                 return r;
8672                         }
8673                 }
8674         } else {
8675                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8676                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8677                     !rdev->mc_fw) {
8678                         r = cik_init_microcode(rdev);
8679                         if (r) {
8680                                 DRM_ERROR("Failed to load firmware!\n");
8681                                 return r;
8682                         }
8683                 }
8684         }
8685
8686         /* Initialize power management */
8687         radeon_pm_init(rdev);
8688
8689         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8690         ring->ring_obj = NULL;
8691         r600_ring_init(rdev, ring, 1024 * 1024);
8692
8693         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8694         ring->ring_obj = NULL;
8695         r600_ring_init(rdev, ring, 1024 * 1024);
8696         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8697         if (r)
8698                 return r;
8699
8700         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8701         ring->ring_obj = NULL;
8702         r600_ring_init(rdev, ring, 1024 * 1024);
8703         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8704         if (r)
8705                 return r;
8706
8707         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8708         ring->ring_obj = NULL;
8709         r600_ring_init(rdev, ring, 256 * 1024);
8710
8711         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8712         ring->ring_obj = NULL;
8713         r600_ring_init(rdev, ring, 256 * 1024);
8714
8715         r = radeon_uvd_init(rdev);
8716         if (!r) {
8717                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8718                 ring->ring_obj = NULL;
8719                 r600_ring_init(rdev, ring, 4096);
8720         }
8721
8722         r = radeon_vce_init(rdev);
8723         if (!r) {
8724                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8725                 ring->ring_obj = NULL;
8726                 r600_ring_init(rdev, ring, 4096);
8727
8728                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8729                 ring->ring_obj = NULL;
8730                 r600_ring_init(rdev, ring, 4096);
8731         }
8732
8733         rdev->ih.ring_obj = NULL;
8734         r600_ih_ring_init(rdev, 64 * 1024);
8735
8736         r = r600_pcie_gart_init(rdev);
8737         if (r)
8738                 return r;
8739
8740         rdev->accel_working = true;
8741         r = cik_startup(rdev);
8742         if (r) {
8743                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8744                 cik_cp_fini(rdev);
8745                 cik_sdma_fini(rdev);
8746                 cik_irq_fini(rdev);
8747                 sumo_rlc_fini(rdev);
8748                 cik_mec_fini(rdev);
8749                 radeon_wb_fini(rdev);
8750                 radeon_ib_pool_fini(rdev);
8751                 radeon_vm_manager_fini(rdev);
8752                 radeon_irq_kms_fini(rdev);
8753                 cik_pcie_gart_fini(rdev);
8754                 rdev->accel_working = false;
8755         }
8756
8757         /* Don't start up if the MC ucode is missing.
8758          * The default clocks and voltages before the MC ucode
8759          * is loaded are not suffient for advanced operations.
8760          */
8761         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8762                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8763                 return -EINVAL;
8764         }
8765
8766         return 0;
8767 }
8768
8769 /**
8770  * cik_fini - asic specific driver and hw fini
8771  *
8772  * @rdev: radeon_device pointer
8773  *
8774  * Tear down the asic specific driver variables and program the hw
8775  * to an idle state (CIK).
8776  * Called at driver unload.
8777  */
8778 void cik_fini(struct radeon_device *rdev)
8779 {
8780         radeon_pm_fini(rdev);
8781         cik_cp_fini(rdev);
8782         cik_sdma_fini(rdev);
8783         cik_fini_pg(rdev);
8784         cik_fini_cg(rdev);
8785         cik_irq_fini(rdev);
8786         sumo_rlc_fini(rdev);
8787         cik_mec_fini(rdev);
8788         radeon_wb_fini(rdev);
8789         radeon_vm_manager_fini(rdev);
8790         radeon_ib_pool_fini(rdev);
8791         radeon_irq_kms_fini(rdev);
8792         uvd_v1_0_fini(rdev);
8793         radeon_uvd_fini(rdev);
8794         radeon_vce_fini(rdev);
8795         cik_pcie_gart_fini(rdev);
8796         r600_vram_scratch_fini(rdev);
8797         radeon_gem_fini(rdev);
8798         radeon_fence_driver_fini(rdev);
8799         radeon_bo_fini(rdev);
8800         radeon_atombios_fini(rdev);
8801         kfree(rdev->bios);
8802         rdev->bios = NULL;
8803 }
8804
8805 void dce8_program_fmt(struct drm_encoder *encoder)
8806 {
8807         struct drm_device *dev = encoder->dev;
8808         struct radeon_device *rdev = dev->dev_private;
8809         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8810         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8811         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8812         int bpc = 0;
8813         u32 tmp = 0;
8814         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8815
8816         if (connector) {
8817                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8818                 bpc = radeon_get_monitor_bpc(connector);
8819                 dither = radeon_connector->dither;
8820         }
8821
8822         /* LVDS/eDP FMT is set up by atom */
8823         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8824                 return;
8825
8826         /* not needed for analog */
8827         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8828             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8829                 return;
8830
8831         if (bpc == 0)
8832                 return;
8833
8834         switch (bpc) {
8835         case 6:
8836                 if (dither == RADEON_FMT_DITHER_ENABLE)
8837                         /* XXX sort out optimal dither settings */
8838                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8839                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8840                 else
8841                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8842                 break;
8843         case 8:
8844                 if (dither == RADEON_FMT_DITHER_ENABLE)
8845                         /* XXX sort out optimal dither settings */
8846                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8847                                 FMT_RGB_RANDOM_ENABLE |
8848                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8849                 else
8850                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8851                 break;
8852         case 10:
8853                 if (dither == RADEON_FMT_DITHER_ENABLE)
8854                         /* XXX sort out optimal dither settings */
8855                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8856                                 FMT_RGB_RANDOM_ENABLE |
8857                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8858                 else
8859                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8860                 break;
8861         default:
8862                 /* not needed */
8863                 break;
8864         }
8865
8866         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8867 }
8868
8869 /* display watermark setup */
8870 /**
8871  * dce8_line_buffer_adjust - Set up the line buffer
8872  *
8873  * @rdev: radeon_device pointer
8874  * @radeon_crtc: the selected display controller
8875  * @mode: the current display mode on the selected display
8876  * controller
8877  *
8878  * Setup up the line buffer allocation for
8879  * the selected display controller (CIK).
8880  * Returns the line buffer size in pixels.
8881  */
8882 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8883                                    struct radeon_crtc *radeon_crtc,
8884                                    struct drm_display_mode *mode)
8885 {
8886         u32 tmp, buffer_alloc, i;
8887         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8888         /*
8889          * Line Buffer Setup
8890          * There are 6 line buffers, one for each display controllers.
8891          * There are 3 partitions per LB. Select the number of partitions
8892          * to enable based on the display width.  For display widths larger
8893          * than 4096, you need use to use 2 display controllers and combine
8894          * them using the stereo blender.
8895          */
8896         if (radeon_crtc->base.enabled && mode) {
8897                 if (mode->crtc_hdisplay < 1920) {
8898                         tmp = 1;
8899                         buffer_alloc = 2;
8900                 } else if (mode->crtc_hdisplay < 2560) {
8901                         tmp = 2;
8902                         buffer_alloc = 2;
8903                 } else if (mode->crtc_hdisplay < 4096) {
8904                         tmp = 0;
8905                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8906                 } else {
8907                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8908                         tmp = 0;
8909                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8910                 }
8911         } else {
8912                 tmp = 1;
8913                 buffer_alloc = 0;
8914         }
8915
8916         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8917                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8918
8919         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8920                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8921         for (i = 0; i < rdev->usec_timeout; i++) {
8922                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8923                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8924                         break;
8925                 udelay(1);
8926         }
8927
8928         if (radeon_crtc->base.enabled && mode) {
8929                 switch (tmp) {
8930                 case 0:
8931                 default:
8932                         return 4096 * 2;
8933                 case 1:
8934                         return 1920 * 2;
8935                 case 2:
8936                         return 2560 * 2;
8937                 }
8938         }
8939
8940         /* controller not enabled, so no lb used */
8941         return 0;
8942 }
8943
8944 /**
8945  * cik_get_number_of_dram_channels - get the number of dram channels
8946  *
8947  * @rdev: radeon_device pointer
8948  *
8949  * Look up the number of video ram channels (CIK).
8950  * Used for display watermark bandwidth calculations
8951  * Returns the number of dram channels
8952  */
8953 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8954 {
8955         u32 tmp = RREG32(MC_SHARED_CHMAP);
8956
8957         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8958         case 0:
8959         default:
8960                 return 1;
8961         case 1:
8962                 return 2;
8963         case 2:
8964                 return 4;
8965         case 3:
8966                 return 8;
8967         case 4:
8968                 return 3;
8969         case 5:
8970                 return 6;
8971         case 6:
8972                 return 10;
8973         case 7:
8974                 return 12;
8975         case 8:
8976                 return 16;
8977         }
8978 }
8979
8980 struct dce8_wm_params {
8981         u32 dram_channels; /* number of dram channels */
8982         u32 yclk;          /* bandwidth per dram data pin in kHz */
8983         u32 sclk;          /* engine clock in kHz */
8984         u32 disp_clk;      /* display clock in kHz */
8985         u32 src_width;     /* viewport width */
8986         u32 active_time;   /* active display time in ns */
8987         u32 blank_time;    /* blank time in ns */
8988         bool interlaced;    /* mode is interlaced */
8989         fixed20_12 vsc;    /* vertical scale ratio */
8990         u32 num_heads;     /* number of active crtcs */
8991         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8992         u32 lb_size;       /* line buffer allocated to pipe */
8993         u32 vtaps;         /* vertical scaler taps */
8994 };
8995
8996 /**
8997  * dce8_dram_bandwidth - get the dram bandwidth
8998  *
8999  * @wm: watermark calculation data
9000  *
9001  * Calculate the raw dram bandwidth (CIK).
9002  * Used for display watermark bandwidth calculations
9003  * Returns the dram bandwidth in MBytes/s
9004  */
9005 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9006 {
9007         /* Calculate raw DRAM Bandwidth */
9008         fixed20_12 dram_efficiency; /* 0.7 */
9009         fixed20_12 yclk, dram_channels, bandwidth;
9010         fixed20_12 a;
9011
9012         a.full = dfixed_const(1000);
9013         yclk.full = dfixed_const(wm->yclk);
9014         yclk.full = dfixed_div(yclk, a);
9015         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9016         a.full = dfixed_const(10);
9017         dram_efficiency.full = dfixed_const(7);
9018         dram_efficiency.full = dfixed_div(dram_efficiency, a);
9019         bandwidth.full = dfixed_mul(dram_channels, yclk);
9020         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9021
9022         return dfixed_trunc(bandwidth);
9023 }
9024
9025 /**
9026  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9027  *
9028  * @wm: watermark calculation data
9029  *
9030  * Calculate the dram bandwidth used for display (CIK).
9031  * Used for display watermark bandwidth calculations
9032  * Returns the dram bandwidth for display in MBytes/s
9033  */
9034 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9035 {
9036         /* Calculate DRAM Bandwidth and the part allocated to display. */
9037         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9038         fixed20_12 yclk, dram_channels, bandwidth;
9039         fixed20_12 a;
9040
9041         a.full = dfixed_const(1000);
9042         yclk.full = dfixed_const(wm->yclk);
9043         yclk.full = dfixed_div(yclk, a);
9044         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9045         a.full = dfixed_const(10);
9046         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9047         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9048         bandwidth.full = dfixed_mul(dram_channels, yclk);
9049         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9050
9051         return dfixed_trunc(bandwidth);
9052 }
9053
9054 /**
9055  * dce8_data_return_bandwidth - get the data return bandwidth
9056  *
9057  * @wm: watermark calculation data
9058  *
9059  * Calculate the data return bandwidth used for display (CIK).
9060  * Used for display watermark bandwidth calculations
9061  * Returns the data return bandwidth in MBytes/s
9062  */
9063 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9064 {
9065         /* Calculate the display Data return Bandwidth */
9066         fixed20_12 return_efficiency; /* 0.8 */
9067         fixed20_12 sclk, bandwidth;
9068         fixed20_12 a;
9069
9070         a.full = dfixed_const(1000);
9071         sclk.full = dfixed_const(wm->sclk);
9072         sclk.full = dfixed_div(sclk, a);
9073         a.full = dfixed_const(10);
9074         return_efficiency.full = dfixed_const(8);
9075         return_efficiency.full = dfixed_div(return_efficiency, a);
9076         a.full = dfixed_const(32);
9077         bandwidth.full = dfixed_mul(a, sclk);
9078         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9079
9080         return dfixed_trunc(bandwidth);
9081 }
9082
9083 /**
9084  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9085  *
9086  * @wm: watermark calculation data
9087  *
9088  * Calculate the dmif bandwidth used for display (CIK).
9089  * Used for display watermark bandwidth calculations
9090  * Returns the dmif bandwidth in MBytes/s
9091  */
9092 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9093 {
9094         /* Calculate the DMIF Request Bandwidth */
9095         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9096         fixed20_12 disp_clk, bandwidth;
9097         fixed20_12 a, b;
9098
9099         a.full = dfixed_const(1000);
9100         disp_clk.full = dfixed_const(wm->disp_clk);
9101         disp_clk.full = dfixed_div(disp_clk, a);
9102         a.full = dfixed_const(32);
9103         b.full = dfixed_mul(a, disp_clk);
9104
9105         a.full = dfixed_const(10);
9106         disp_clk_request_efficiency.full = dfixed_const(8);
9107         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9108
9109         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9110
9111         return dfixed_trunc(bandwidth);
9112 }
9113
9114 /**
9115  * dce8_available_bandwidth - get the min available bandwidth
9116  *
9117  * @wm: watermark calculation data
9118  *
9119  * Calculate the min available bandwidth used for display (CIK).
9120  * Used for display watermark bandwidth calculations
9121  * Returns the min available bandwidth in MBytes/s
9122  */
9123 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9124 {
9125         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9126         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9127         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9128         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9129
9130         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9131 }
9132
9133 /**
9134  * dce8_average_bandwidth - get the average available bandwidth
9135  *
9136  * @wm: watermark calculation data
9137  *
9138  * Calculate the average available bandwidth used for display (CIK).
9139  * Used for display watermark bandwidth calculations
9140  * Returns the average available bandwidth in MBytes/s
9141  */
9142 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9143 {
9144         /* Calculate the display mode Average Bandwidth
9145          * DisplayMode should contain the source and destination dimensions,
9146          * timing, etc.
9147          */
9148         fixed20_12 bpp;
9149         fixed20_12 line_time;
9150         fixed20_12 src_width;
9151         fixed20_12 bandwidth;
9152         fixed20_12 a;
9153
9154         a.full = dfixed_const(1000);
9155         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9156         line_time.full = dfixed_div(line_time, a);
9157         bpp.full = dfixed_const(wm->bytes_per_pixel);
9158         src_width.full = dfixed_const(wm->src_width);
9159         bandwidth.full = dfixed_mul(src_width, bpp);
9160         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9161         bandwidth.full = dfixed_div(bandwidth, line_time);
9162
9163         return dfixed_trunc(bandwidth);
9164 }
9165
9166 /**
9167  * dce8_latency_watermark - get the latency watermark
9168  *
9169  * @wm: watermark calculation data
9170  *
9171  * Calculate the latency watermark (CIK).
9172  * Used for display watermark bandwidth calculations
9173  * Returns the latency watermark in ns
9174  */
9175 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9176 {
9177         /* First calculate the latency in ns */
9178         u32 mc_latency = 2000; /* 2000 ns. */
9179         u32 available_bandwidth = dce8_available_bandwidth(wm);
9180         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9181         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9182         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9183         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9184                 (wm->num_heads * cursor_line_pair_return_time);
9185         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9186         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9187         u32 tmp, dmif_size = 12288;
9188         fixed20_12 a, b, c;
9189
9190         if (wm->num_heads == 0)
9191                 return 0;
9192
9193         a.full = dfixed_const(2);
9194         b.full = dfixed_const(1);
9195         if ((wm->vsc.full > a.full) ||
9196             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9197             (wm->vtaps >= 5) ||
9198             ((wm->vsc.full >= a.full) && wm->interlaced))
9199                 max_src_lines_per_dst_line = 4;
9200         else
9201                 max_src_lines_per_dst_line = 2;
9202
9203         a.full = dfixed_const(available_bandwidth);
9204         b.full = dfixed_const(wm->num_heads);
9205         a.full = dfixed_div(a, b);
9206
9207         b.full = dfixed_const(mc_latency + 512);
9208         c.full = dfixed_const(wm->disp_clk);
9209         b.full = dfixed_div(b, c);
9210
9211         c.full = dfixed_const(dmif_size);
9212         b.full = dfixed_div(c, b);
9213
9214         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9215
9216         b.full = dfixed_const(1000);
9217         c.full = dfixed_const(wm->disp_clk);
9218         b.full = dfixed_div(c, b);
9219         c.full = dfixed_const(wm->bytes_per_pixel);
9220         b.full = dfixed_mul(b, c);
9221
9222         lb_fill_bw = min(tmp, dfixed_trunc(b));
9223
9224         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9225         b.full = dfixed_const(1000);
9226         c.full = dfixed_const(lb_fill_bw);
9227         b.full = dfixed_div(c, b);
9228         a.full = dfixed_div(a, b);
9229         line_fill_time = dfixed_trunc(a);
9230
9231         if (line_fill_time < wm->active_time)
9232                 return latency;
9233         else
9234                 return latency + (line_fill_time - wm->active_time);
9235
9236 }
9237
9238 /**
9239  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9240  * average and available dram bandwidth
9241  *
9242  * @wm: watermark calculation data
9243  *
9244  * Check if the display average bandwidth fits in the display
9245  * dram bandwidth (CIK).
9246  * Used for display watermark bandwidth calculations
9247  * Returns true if the display fits, false if not.
9248  */
9249 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9250 {
9251         if (dce8_average_bandwidth(wm) <=
9252             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9253                 return true;
9254         else
9255                 return false;
9256 }
9257
9258 /**
9259  * dce8_average_bandwidth_vs_available_bandwidth - check
9260  * average and available bandwidth
9261  *
9262  * @wm: watermark calculation data
9263  *
9264  * Check if the display average bandwidth fits in the display
9265  * available bandwidth (CIK).
9266  * Used for display watermark bandwidth calculations
9267  * Returns true if the display fits, false if not.
9268  */
9269 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9270 {
9271         if (dce8_average_bandwidth(wm) <=
9272             (dce8_available_bandwidth(wm) / wm->num_heads))
9273                 return true;
9274         else
9275                 return false;
9276 }
9277
9278 /**
9279  * dce8_check_latency_hiding - check latency hiding
9280  *
9281  * @wm: watermark calculation data
9282  *
9283  * Check latency hiding (CIK).
9284  * Used for display watermark bandwidth calculations
9285  * Returns true if the display fits, false if not.
9286  */
9287 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9288 {
9289         u32 lb_partitions = wm->lb_size / wm->src_width;
9290         u32 line_time = wm->active_time + wm->blank_time;
9291         u32 latency_tolerant_lines;
9292         u32 latency_hiding;
9293         fixed20_12 a;
9294
9295         a.full = dfixed_const(1);
9296         if (wm->vsc.full > a.full)
9297                 latency_tolerant_lines = 1;
9298         else {
9299                 if (lb_partitions <= (wm->vtaps + 1))
9300                         latency_tolerant_lines = 1;
9301                 else
9302                         latency_tolerant_lines = 2;
9303         }
9304
9305         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9306
9307         if (dce8_latency_watermark(wm) <= latency_hiding)
9308                 return true;
9309         else
9310                 return false;
9311 }
9312
9313 /**
9314  * dce8_program_watermarks - program display watermarks
9315  *
9316  * @rdev: radeon_device pointer
9317  * @radeon_crtc: the selected display controller
9318  * @lb_size: line buffer size
9319  * @num_heads: number of display controllers in use
9320  *
9321  * Calculate and program the display watermarks for the
9322  * selected display controller (CIK).
9323  */
9324 static void dce8_program_watermarks(struct radeon_device *rdev,
9325                                     struct radeon_crtc *radeon_crtc,
9326                                     u32 lb_size, u32 num_heads)
9327 {
9328         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9329         struct dce8_wm_params wm_low, wm_high;
9330         u32 pixel_period;
9331         u32 line_time = 0;
9332         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9333         u32 tmp, wm_mask;
9334
9335         if (radeon_crtc->base.enabled && num_heads && mode) {
9336                 pixel_period = 1000000 / (u32)mode->clock;
9337                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9338
9339                 /* watermark for high clocks */
9340                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9341                     rdev->pm.dpm_enabled) {
9342                         wm_high.yclk =
9343                                 radeon_dpm_get_mclk(rdev, false) * 10;
9344                         wm_high.sclk =
9345                                 radeon_dpm_get_sclk(rdev, false) * 10;
9346                 } else {
9347                         wm_high.yclk = rdev->pm.current_mclk * 10;
9348                         wm_high.sclk = rdev->pm.current_sclk * 10;
9349                 }
9350
9351                 wm_high.disp_clk = mode->clock;
9352                 wm_high.src_width = mode->crtc_hdisplay;
9353                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9354                 wm_high.blank_time = line_time - wm_high.active_time;
9355                 wm_high.interlaced = false;
9356                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9357                         wm_high.interlaced = true;
9358                 wm_high.vsc = radeon_crtc->vsc;
9359                 wm_high.vtaps = 1;
9360                 if (radeon_crtc->rmx_type != RMX_OFF)
9361                         wm_high.vtaps = 2;
9362                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9363                 wm_high.lb_size = lb_size;
9364                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9365                 wm_high.num_heads = num_heads;
9366
9367                 /* set for high clocks */
9368                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9369
9370                 /* possibly force display priority to high */
9371                 /* should really do this at mode validation time... */
9372                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9373                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9374                     !dce8_check_latency_hiding(&wm_high) ||
9375                     (rdev->disp_priority == 2)) {
9376                         DRM_DEBUG_KMS("force priority to high\n");
9377                 }
9378
9379                 /* watermark for low clocks */
9380                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9381                     rdev->pm.dpm_enabled) {
9382                         wm_low.yclk =
9383                                 radeon_dpm_get_mclk(rdev, true) * 10;
9384                         wm_low.sclk =
9385                                 radeon_dpm_get_sclk(rdev, true) * 10;
9386                 } else {
9387                         wm_low.yclk = rdev->pm.current_mclk * 10;
9388                         wm_low.sclk = rdev->pm.current_sclk * 10;
9389                 }
9390
9391                 wm_low.disp_clk = mode->clock;
9392                 wm_low.src_width = mode->crtc_hdisplay;
9393                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9394                 wm_low.blank_time = line_time - wm_low.active_time;
9395                 wm_low.interlaced = false;
9396                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9397                         wm_low.interlaced = true;
9398                 wm_low.vsc = radeon_crtc->vsc;
9399                 wm_low.vtaps = 1;
9400                 if (radeon_crtc->rmx_type != RMX_OFF)
9401                         wm_low.vtaps = 2;
9402                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9403                 wm_low.lb_size = lb_size;
9404                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9405                 wm_low.num_heads = num_heads;
9406
9407                 /* set for low clocks */
9408                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9409
9410                 /* possibly force display priority to high */
9411                 /* should really do this at mode validation time... */
9412                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9413                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9414                     !dce8_check_latency_hiding(&wm_low) ||
9415                     (rdev->disp_priority == 2)) {
9416                         DRM_DEBUG_KMS("force priority to high\n");
9417                 }
9418         }
9419
9420         /* select wm A */
9421         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9422         tmp = wm_mask;
9423         tmp &= ~LATENCY_WATERMARK_MASK(3);
9424         tmp |= LATENCY_WATERMARK_MASK(1);
9425         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9426         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9427                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9428                 LATENCY_HIGH_WATERMARK(line_time)));
9429         /* select wm B */
9430         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9431         tmp &= ~LATENCY_WATERMARK_MASK(3);
9432         tmp |= LATENCY_WATERMARK_MASK(2);
9433         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9434         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9435                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9436                 LATENCY_HIGH_WATERMARK(line_time)));
9437         /* restore original selection */
9438         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9439
9440         /* save values for DPM */
9441         radeon_crtc->line_time = line_time;
9442         radeon_crtc->wm_high = latency_watermark_a;
9443         radeon_crtc->wm_low = latency_watermark_b;
9444 }
9445
9446 /**
9447  * dce8_bandwidth_update - program display watermarks
9448  *
9449  * @rdev: radeon_device pointer
9450  *
9451  * Calculate and program the display watermarks and line
9452  * buffer allocation (CIK).
9453  */
9454 void dce8_bandwidth_update(struct radeon_device *rdev)
9455 {
9456         struct drm_display_mode *mode = NULL;
9457         u32 num_heads = 0, lb_size;
9458         int i;
9459
9460         if (!rdev->mode_info.mode_config_initialized)
9461                 return;
9462
9463         radeon_update_display_priority(rdev);
9464
9465         for (i = 0; i < rdev->num_crtc; i++) {
9466                 if (rdev->mode_info.crtcs[i]->base.enabled)
9467                         num_heads++;
9468         }
9469         for (i = 0; i < rdev->num_crtc; i++) {
9470                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9471                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9472                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9473         }
9474 }
9475
9476 /**
9477  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9478  *
9479  * @rdev: radeon_device pointer
9480  *
9481  * Fetches a GPU clock counter snapshot (SI).
9482  * Returns the 64 bit clock counter snapshot.
9483  */
9484 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9485 {
9486         uint64_t clock;
9487
9488         mutex_lock(&rdev->gpu_clock_mutex);
9489         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9490         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9491                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9492         mutex_unlock(&rdev->gpu_clock_mutex);
9493         return clock;
9494 }
9495
9496 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9497                               u32 cntl_reg, u32 status_reg)
9498 {
9499         int r, i;
9500         struct atom_clock_dividers dividers;
9501         uint32_t tmp;
9502
9503         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9504                                            clock, false, &dividers);
9505         if (r)
9506                 return r;
9507
9508         tmp = RREG32_SMC(cntl_reg);
9509         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9510         tmp |= dividers.post_divider;
9511         WREG32_SMC(cntl_reg, tmp);
9512
9513         for (i = 0; i < 100; i++) {
9514                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9515                         break;
9516                 mdelay(10);
9517         }
9518         if (i == 100)
9519                 return -ETIMEDOUT;
9520
9521         return 0;
9522 }
9523
9524 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9525 {
9526         int r = 0;
9527
9528         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9529         if (r)
9530                 return r;
9531
9532         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9533         return r;
9534 }
9535
9536 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9537 {
9538         int r, i;
9539         struct atom_clock_dividers dividers;
9540         u32 tmp;
9541
9542         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9543                                            ecclk, false, &dividers);
9544         if (r)
9545                 return r;
9546
9547         for (i = 0; i < 100; i++) {
9548                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9549                         break;
9550                 mdelay(10);
9551         }
9552         if (i == 100)
9553                 return -ETIMEDOUT;
9554
9555         tmp = RREG32_SMC(CG_ECLK_CNTL);
9556         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9557         tmp |= dividers.post_divider;
9558         WREG32_SMC(CG_ECLK_CNTL, tmp);
9559
9560         for (i = 0; i < 100; i++) {
9561                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9562                         break;
9563                 mdelay(10);
9564         }
9565         if (i == 100)
9566                 return -ETIMEDOUT;
9567
9568         return 0;
9569 }
9570
9571 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9572 {
9573         struct pci_dev *root = rdev->pdev->bus->self;
9574         int bridge_pos, gpu_pos;
9575         u32 speed_cntl, mask, current_data_rate;
9576         int ret, i;
9577         u16 tmp16;
9578
9579         if (pci_is_root_bus(rdev->pdev->bus))
9580                 return;
9581
9582         if (radeon_pcie_gen2 == 0)
9583                 return;
9584
9585         if (rdev->flags & RADEON_IS_IGP)
9586                 return;
9587
9588         if (!(rdev->flags & RADEON_IS_PCIE))
9589                 return;
9590
9591         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9592         if (ret != 0)
9593                 return;
9594
9595         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9596                 return;
9597
9598         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9599         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9600                 LC_CURRENT_DATA_RATE_SHIFT;
9601         if (mask & DRM_PCIE_SPEED_80) {
9602                 if (current_data_rate == 2) {
9603                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9604                         return;
9605                 }
9606                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9607         } else if (mask & DRM_PCIE_SPEED_50) {
9608                 if (current_data_rate == 1) {
9609                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9610                         return;
9611                 }
9612                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9613         }
9614
9615         bridge_pos = pci_pcie_cap(root);
9616         if (!bridge_pos)
9617                 return;
9618
9619         gpu_pos = pci_pcie_cap(rdev->pdev);
9620         if (!gpu_pos)
9621                 return;
9622
9623         if (mask & DRM_PCIE_SPEED_80) {
9624                 /* re-try equalization if gen3 is not already enabled */
9625                 if (current_data_rate != 2) {
9626                         u16 bridge_cfg, gpu_cfg;
9627                         u16 bridge_cfg2, gpu_cfg2;
9628                         u32 max_lw, current_lw, tmp;
9629
9630                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9631                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9632
9633                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9634                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9635
9636                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9637                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9638
9639                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9640                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9641                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9642
9643                         if (current_lw < max_lw) {
9644                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9645                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9646                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9647                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9648                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9649                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9650                                 }
9651                         }
9652
9653                         for (i = 0; i < 10; i++) {
9654                                 /* check status */
9655                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9656                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9657                                         break;
9658
9659                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9660                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9661
9662                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9663                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9664
9665                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9666                                 tmp |= LC_SET_QUIESCE;
9667                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9668
9669                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9670                                 tmp |= LC_REDO_EQ;
9671                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9672
9673                                 mdelay(100);
9674
9675                                 /* linkctl */
9676                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9677                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9678                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9679                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9680
9681                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9682                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9683                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9684                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9685
9686                                 /* linkctl2 */
9687                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9688                                 tmp16 &= ~((1 << 4) | (7 << 9));
9689                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9690                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9691
9692                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9693                                 tmp16 &= ~((1 << 4) | (7 << 9));
9694                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9695                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9696
9697                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9698                                 tmp &= ~LC_SET_QUIESCE;
9699                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9700                         }
9701                 }
9702         }
9703
9704         /* set the link speed */
9705         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9706         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9707         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9708
9709         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9710         tmp16 &= ~0xf;
9711         if (mask & DRM_PCIE_SPEED_80)
9712                 tmp16 |= 3; /* gen3 */
9713         else if (mask & DRM_PCIE_SPEED_50)
9714                 tmp16 |= 2; /* gen2 */
9715         else
9716                 tmp16 |= 1; /* gen1 */
9717         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9718
9719         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9720         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9721         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9722
9723         for (i = 0; i < rdev->usec_timeout; i++) {
9724                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9725                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9726                         break;
9727                 udelay(1);
9728         }
9729 }
9730
9731 static void cik_program_aspm(struct radeon_device *rdev)
9732 {
9733         u32 data, orig;
9734         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9735         bool disable_clkreq = false;
9736
9737         if (radeon_aspm == 0)
9738                 return;
9739
9740         /* XXX double check IGPs */
9741         if (rdev->flags & RADEON_IS_IGP)
9742                 return;
9743
9744         if (!(rdev->flags & RADEON_IS_PCIE))
9745                 return;
9746
9747         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9748         data &= ~LC_XMIT_N_FTS_MASK;
9749         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9750         if (orig != data)
9751                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9752
9753         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9754         data |= LC_GO_TO_RECOVERY;
9755         if (orig != data)
9756                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9757
9758         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9759         data |= P_IGNORE_EDB_ERR;
9760         if (orig != data)
9761                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9762
9763         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9764         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9765         data |= LC_PMI_TO_L1_DIS;
9766         if (!disable_l0s)
9767                 data |= LC_L0S_INACTIVITY(7);
9768
9769         if (!disable_l1) {
9770                 data |= LC_L1_INACTIVITY(7);
9771                 data &= ~LC_PMI_TO_L1_DIS;
9772                 if (orig != data)
9773                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9774
9775                 if (!disable_plloff_in_l1) {
9776                         bool clk_req_support;
9777
9778                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9779                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9780                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9781                         if (orig != data)
9782                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9783
9784                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9785                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9786                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9787                         if (orig != data)
9788                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9789
9790                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9791                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9792                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9793                         if (orig != data)
9794                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9795
9796                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9797                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9798                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9799                         if (orig != data)
9800                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9801
9802                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9803                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9804                         data |= LC_DYN_LANES_PWR_STATE(3);
9805                         if (orig != data)
9806                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9807
9808                         if (!disable_clkreq &&
9809                             !pci_is_root_bus(rdev->pdev->bus)) {
9810                                 struct pci_dev *root = rdev->pdev->bus->self;
9811                                 u32 lnkcap;
9812
9813                                 clk_req_support = false;
9814                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9815                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9816                                         clk_req_support = true;
9817                         } else {
9818                                 clk_req_support = false;
9819                         }
9820
9821                         if (clk_req_support) {
9822                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9823                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9824                                 if (orig != data)
9825                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9826
9827                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9828                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9829                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9830                                 if (orig != data)
9831                                         WREG32_SMC(THM_CLK_CNTL, data);
9832
9833                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9834                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9835                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9836                                 if (orig != data)
9837                                         WREG32_SMC(MISC_CLK_CTRL, data);
9838
9839                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9840                                 data &= ~BCLK_AS_XCLK;
9841                                 if (orig != data)
9842                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9843
9844                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9845                                 data &= ~FORCE_BIF_REFCLK_EN;
9846                                 if (orig != data)
9847                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9848
9849                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9850                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9851                                 data |= MPLL_CLKOUT_SEL(4);
9852                                 if (orig != data)
9853                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9854                         }
9855                 }
9856         } else {
9857                 if (orig != data)
9858                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9859         }
9860
9861         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9862         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9863         if (orig != data)
9864                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9865
9866         if (!disable_l0s) {
9867                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9868                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9869                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9870                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9871                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9872                                 data &= ~LC_L0S_INACTIVITY_MASK;
9873                                 if (orig != data)
9874                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9875                         }
9876                 }
9877         }
9878 }