Merge remote-tracking branches 'asoc/fix/fsl-ssi', 'asoc/fix/intel', 'asoc/fix/intel...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142                                           bool enable);
143
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155                                   u32 reg, u32 *val)
156 {
157         switch (reg) {
158         case GRBM_STATUS:
159         case GRBM_STATUS2:
160         case GRBM_STATUS_SE0:
161         case GRBM_STATUS_SE1:
162         case GRBM_STATUS_SE2:
163         case GRBM_STATUS_SE3:
164         case SRBM_STATUS:
165         case SRBM_STATUS2:
166         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168         case UVD_STATUS:
169         /* TODO VCE */
170                 *val = RREG32(reg);
171                 return 0;
172         default:
173                 return -EINVAL;
174         }
175 }
176
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182         unsigned long flags;
183         u32 r;
184
185         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186         WREG32(CIK_DIDT_IND_INDEX, (reg));
187         r = RREG32(CIK_DIDT_IND_DATA);
188         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189         return r;
190 }
191
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194         unsigned long flags;
195
196         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197         WREG32(CIK_DIDT_IND_INDEX, (reg));
198         WREG32(CIK_DIDT_IND_DATA, (v));
199         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205         u32 temp;
206         int actual_temp = 0;
207
208         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209                 CTF_TEMP_SHIFT;
210
211         if (temp & 0x200)
212                 actual_temp = 255;
213         else
214                 actual_temp = temp & 0x1ff;
215
216         actual_temp = actual_temp * 1000;
217
218         return actual_temp;
219 }
220
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224         u32 temp;
225         int actual_temp = 0;
226
227         temp = RREG32_SMC(0xC0300E0C);
228
229         if (temp)
230                 actual_temp = (temp / 8) - 49;
231         else
232                 actual_temp = 0;
233
234         actual_temp = actual_temp * 1000;
235
236         return actual_temp;
237 }
238
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244         unsigned long flags;
245         u32 r;
246
247         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248         WREG32(PCIE_INDEX, reg);
249         (void)RREG32(PCIE_INDEX);
250         r = RREG32(PCIE_DATA);
251         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252         return r;
253 }
254
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257         unsigned long flags;
258
259         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260         WREG32(PCIE_INDEX, reg);
261         (void)RREG32(PCIE_INDEX);
262         WREG32(PCIE_DATA, v);
263         (void)RREG32(PCIE_DATA);
264         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269         (0x0e00 << 16) | (0xc12c >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc140 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc150 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc15c >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc168 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc170 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc178 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc204 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc2b4 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc2b8 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2bc >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2c0 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0x8228 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0x829c >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x869c >> 2),
298         0x00000000,
299         (0x0600 << 16) | (0x98f4 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x98f8 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0x9900 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc260 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0x90e8 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x3c000 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x3c00c >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x8c1c >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x9700 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xcd20 >> 2),
318         0x00000000,
319         (0x4e00 << 16) | (0xcd20 >> 2),
320         0x00000000,
321         (0x5e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x6e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x7e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x8e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x9e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0xae00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0xbe00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0x89bc >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x8900 >> 2),
338         0x00000000,
339         0x3,
340         (0x0e00 << 16) | (0xc130 >> 2),
341         0x00000000,
342         (0x0e00 << 16) | (0xc134 >> 2),
343         0x00000000,
344         (0x0e00 << 16) | (0xc1fc >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc208 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc264 >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc268 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc26c >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc270 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc274 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc278 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc27c >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc280 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc284 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc288 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc28c >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc290 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc294 >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc298 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc29c >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc2a0 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc2a4 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a8 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2ac  >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2b0 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x301d0 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x30238 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30250 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30254 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30258 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x3025c >> 2),
399         0x00000000,
400         (0x4e00 << 16) | (0xc900 >> 2),
401         0x00000000,
402         (0x5e00 << 16) | (0xc900 >> 2),
403         0x00000000,
404         (0x6e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x7e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x8e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x9e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0xae00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0xbe00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0x4e00 << 16) | (0xc904 >> 2),
417         0x00000000,
418         (0x5e00 << 16) | (0xc904 >> 2),
419         0x00000000,
420         (0x6e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x7e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x8e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x9e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0xae00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0xbe00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0x4e00 << 16) | (0xc908 >> 2),
433         0x00000000,
434         (0x5e00 << 16) | (0xc908 >> 2),
435         0x00000000,
436         (0x6e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x7e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x8e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x9e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0xae00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0xbe00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0x4e00 << 16) | (0xc90c >> 2),
449         0x00000000,
450         (0x5e00 << 16) | (0xc90c >> 2),
451         0x00000000,
452         (0x6e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x7e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x8e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x9e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0xae00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0xbe00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0x4e00 << 16) | (0xc910 >> 2),
465         0x00000000,
466         (0x5e00 << 16) | (0xc910 >> 2),
467         0x00000000,
468         (0x6e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x7e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x8e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x9e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0xae00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0xbe00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0xc99c >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x9834 >> 2),
483         0x00000000,
484         (0x0000 << 16) | (0x30f00 >> 2),
485         0x00000000,
486         (0x0001 << 16) | (0x30f00 >> 2),
487         0x00000000,
488         (0x0000 << 16) | (0x30f04 >> 2),
489         0x00000000,
490         (0x0001 << 16) | (0x30f04 >> 2),
491         0x00000000,
492         (0x0000 << 16) | (0x30f08 >> 2),
493         0x00000000,
494         (0x0001 << 16) | (0x30f08 >> 2),
495         0x00000000,
496         (0x0000 << 16) | (0x30f0c >> 2),
497         0x00000000,
498         (0x0001 << 16) | (0x30f0c >> 2),
499         0x00000000,
500         (0x0600 << 16) | (0x9b7c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0x8a14 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x8a18 >> 2),
505         0x00000000,
506         (0x0600 << 16) | (0x30a00 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8bf0 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x8bcc >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8b24 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x30a04 >> 2),
515         0x00000000,
516         (0x0600 << 16) | (0x30a10 >> 2),
517         0x00000000,
518         (0x0600 << 16) | (0x30a14 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a18 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a2c >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0xc700 >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xc704 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc708 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc768 >> 2),
531         0x00000000,
532         (0x0400 << 16) | (0xc770 >> 2),
533         0x00000000,
534         (0x0400 << 16) | (0xc774 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc778 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc77c >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc780 >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc784 >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc788 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc78c >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc798 >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc79c >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc7a0 >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc7a4 >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a8 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7ac >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7b0 >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7b4 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x9100 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x3c010 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x92a8 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x92ac >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92b4 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92b8 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92bc >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92c0 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92c4 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c8 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92cc >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92d0 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x8c00 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8c04 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c20 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c38 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c3c >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xae00 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x9604 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xac08 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xac0c >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac10 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac14 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac58 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac68 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac6c >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac70 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac74 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac78 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac7c >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac80 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac84 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac88 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac8c >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x970c >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9714 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x9718 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x971c >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x31068 >> 2),
641         0x00000000,
642         (0x4e00 << 16) | (0x31068 >> 2),
643         0x00000000,
644         (0x5e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x6e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x7e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x8e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x9e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0xae00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0xbe00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0xcd10 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xcd14 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0x88b0 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x88b4 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b8 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88bc >> 2),
669         0x00000000,
670         (0x0400 << 16) | (0x89c0 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x88c4 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x88c8 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88d0 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88d4 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d8 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x8980 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x30938 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x3093c >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x30940 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x89a0 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x30900 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30904 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x89b4 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x3c210 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x3c214 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c218 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x8904 >> 2),
705         0x00000000,
706         0x5,
707         (0x0e00 << 16) | (0x8c28 >> 2),
708         (0x0e00 << 16) | (0x8c2c >> 2),
709         (0x0e00 << 16) | (0x8c30 >> 2),
710         (0x0e00 << 16) | (0x8c34 >> 2),
711         (0x0e00 << 16) | (0x9600 >> 2),
712 };
713
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716         (0x0e00 << 16) | (0xc12c >> 2),
717         0x00000000,
718         (0x0e00 << 16) | (0xc140 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc150 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc15c >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc168 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc170 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc204 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc2b4 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc2b8 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2bc >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2c0 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0x8228 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x829c >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x869c >> 2),
743         0x00000000,
744         (0x0600 << 16) | (0x98f4 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x98f8 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x9900 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc260 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x90e8 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x3c000 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x3c00c >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x8c1c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x9700 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0xcd20 >> 2),
763         0x00000000,
764         (0x4e00 << 16) | (0xcd20 >> 2),
765         0x00000000,
766         (0x5e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x6e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x7e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x89bc >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x8900 >> 2),
775         0x00000000,
776         0x3,
777         (0x0e00 << 16) | (0xc130 >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0xc134 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc1fc >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc208 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc264 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc268 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc26c >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc270 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc274 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc28c >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc290 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc294 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc298 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc2a0 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc2a4 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a8 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2ac >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x301d0 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x30238 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x30250 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30254 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30258 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x3025c >> 2),
822         0x00000000,
823         (0x4e00 << 16) | (0xc900 >> 2),
824         0x00000000,
825         (0x5e00 << 16) | (0xc900 >> 2),
826         0x00000000,
827         (0x6e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x7e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x4e00 << 16) | (0xc904 >> 2),
832         0x00000000,
833         (0x5e00 << 16) | (0xc904 >> 2),
834         0x00000000,
835         (0x6e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x7e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x4e00 << 16) | (0xc908 >> 2),
840         0x00000000,
841         (0x5e00 << 16) | (0xc908 >> 2),
842         0x00000000,
843         (0x6e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x7e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x4e00 << 16) | (0xc90c >> 2),
848         0x00000000,
849         (0x5e00 << 16) | (0xc90c >> 2),
850         0x00000000,
851         (0x6e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x7e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x4e00 << 16) | (0xc910 >> 2),
856         0x00000000,
857         (0x5e00 << 16) | (0xc910 >> 2),
858         0x00000000,
859         (0x6e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x7e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0xc99c >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0x9834 >> 2),
866         0x00000000,
867         (0x0000 << 16) | (0x30f00 >> 2),
868         0x00000000,
869         (0x0000 << 16) | (0x30f04 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f08 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f0c >> 2),
874         0x00000000,
875         (0x0600 << 16) | (0x9b7c >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x8a14 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x8a18 >> 2),
880         0x00000000,
881         (0x0600 << 16) | (0x30a00 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8bf0 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x8bcc >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8b24 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x30a04 >> 2),
890         0x00000000,
891         (0x0600 << 16) | (0x30a10 >> 2),
892         0x00000000,
893         (0x0600 << 16) | (0x30a14 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a18 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a2c >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0xc700 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xc704 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc708 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc768 >> 2),
906         0x00000000,
907         (0x0400 << 16) | (0xc770 >> 2),
908         0x00000000,
909         (0x0400 << 16) | (0xc774 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc798 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc79c >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x9100 >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x3c010 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x8c00 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x8c04 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c20 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c38 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c3c >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0xae00 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x9604 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xac08 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0xac0c >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac10 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac14 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac58 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac68 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac6c >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac70 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac74 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac78 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac7c >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac80 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac84 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac88 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac8c >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0x970c >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x9714 >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x9718 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x971c >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x31068 >> 2),
972         0x00000000,
973         (0x4e00 << 16) | (0x31068 >> 2),
974         0x00000000,
975         (0x5e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x6e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x7e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x0e00 << 16) | (0xcd10 >> 2),
982         0x00000000,
983         (0x0e00 << 16) | (0xcd14 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0x88b0 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0x88b4 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b8 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88bc >> 2),
992         0x00000000,
993         (0x0400 << 16) | (0x89c0 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x88c4 >> 2),
996         0x00000000,
997         (0x0e00 << 16) | (0x88c8 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88d0 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88d4 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d8 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x8980 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x30938 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x3093c >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x30940 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x89a0 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30900 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x30904 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x89b4 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x3e1fc >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x3c210 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3c214 >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c218 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x8904 >> 2),
1030         0x00000000,
1031         0x5,
1032         (0x0e00 << 16) | (0x8c28 >> 2),
1033         (0x0e00 << 16) | (0x8c2c >> 2),
1034         (0x0e00 << 16) | (0x8c30 >> 2),
1035         (0x0e00 << 16) | (0x8c34 >> 2),
1036         (0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041         0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046         0xc770, 0xffffffff, 0x00000800,
1047         0xc774, 0xffffffff, 0x00000800,
1048         0xc798, 0xffffffff, 0x00007fbf,
1049         0xc79c, 0xffffffff, 0x00007faf
1050 };
1051
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054         0x3354, 0x00000333, 0x00000333,
1055         0x3350, 0x000c0fc0, 0x00040200,
1056         0x9a10, 0x00010000, 0x00058208,
1057         0x3c000, 0xffff1fff, 0x00140000,
1058         0x3c200, 0xfdfc0fff, 0x00000100,
1059         0x3c234, 0x40000000, 0x40000200,
1060         0x9830, 0xffffffff, 0x00000000,
1061         0x9834, 0xf00fffff, 0x00000400,
1062         0x9838, 0x0002021c, 0x00020200,
1063         0xc78, 0x00000080, 0x00000000,
1064         0x5bb0, 0x000000f0, 0x00000070,
1065         0x5bc0, 0xf0311fff, 0x80300000,
1066         0x98f8, 0x73773777, 0x12010001,
1067         0x350c, 0x00810000, 0x408af000,
1068         0x7030, 0x31000111, 0x00000011,
1069         0x2f48, 0x73773777, 0x12010001,
1070         0x220c, 0x00007fb6, 0x0021a1b1,
1071         0x2210, 0x00007fb6, 0x002021b1,
1072         0x2180, 0x00007fb6, 0x00002191,
1073         0x2218, 0x00007fb6, 0x002121b1,
1074         0x221c, 0x00007fb6, 0x002021b1,
1075         0x21dc, 0x00007fb6, 0x00002191,
1076         0x21e0, 0x00007fb6, 0x00002191,
1077         0x3628, 0x0000003f, 0x0000000a,
1078         0x362c, 0x0000003f, 0x0000000a,
1079         0x2ae4, 0x00073ffe, 0x000022a2,
1080         0x240c, 0x000007ff, 0x00000000,
1081         0x8a14, 0xf000003f, 0x00000007,
1082         0x8bf0, 0x00002001, 0x00000001,
1083         0x8b24, 0xffffffff, 0x00ffffff,
1084         0x30a04, 0x0000ff0f, 0x00000000,
1085         0x28a4c, 0x07ffffff, 0x06000000,
1086         0x4d8, 0x00000fff, 0x00000100,
1087         0x3e78, 0x00000001, 0x00000002,
1088         0x9100, 0x03000000, 0x0362c688,
1089         0x8c00, 0x000000ff, 0x00000001,
1090         0xe40, 0x00001fff, 0x00001fff,
1091         0x9060, 0x0000007f, 0x00000020,
1092         0x9508, 0x00010000, 0x00010000,
1093         0xac14, 0x000003ff, 0x000000f3,
1094         0xac0c, 0xffffffff, 0x00001032
1095 };
1096
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099         0xc420, 0xffffffff, 0xfffffffc,
1100         0x30800, 0xffffffff, 0xe0000000,
1101         0x3c2a0, 0xffffffff, 0x00000100,
1102         0x3c208, 0xffffffff, 0x00000100,
1103         0x3c2c0, 0xffffffff, 0xc0000100,
1104         0x3c2c8, 0xffffffff, 0xc0000100,
1105         0x3c2c4, 0xffffffff, 0xc0000100,
1106         0x55e4, 0xffffffff, 0x00600100,
1107         0x3c280, 0xffffffff, 0x00000100,
1108         0x3c214, 0xffffffff, 0x06000100,
1109         0x3c220, 0xffffffff, 0x00000100,
1110         0x3c218, 0xffffffff, 0x06000100,
1111         0x3c204, 0xffffffff, 0x00000100,
1112         0x3c2e0, 0xffffffff, 0x00000100,
1113         0x3c224, 0xffffffff, 0x00000100,
1114         0x3c200, 0xffffffff, 0x00000100,
1115         0x3c230, 0xffffffff, 0x00000100,
1116         0x3c234, 0xffffffff, 0x00000100,
1117         0x3c250, 0xffffffff, 0x00000100,
1118         0x3c254, 0xffffffff, 0x00000100,
1119         0x3c258, 0xffffffff, 0x00000100,
1120         0x3c25c, 0xffffffff, 0x00000100,
1121         0x3c260, 0xffffffff, 0x00000100,
1122         0x3c27c, 0xffffffff, 0x00000100,
1123         0x3c278, 0xffffffff, 0x00000100,
1124         0x3c210, 0xffffffff, 0x06000100,
1125         0x3c290, 0xffffffff, 0x00000100,
1126         0x3c274, 0xffffffff, 0x00000100,
1127         0x3c2b4, 0xffffffff, 0x00000100,
1128         0x3c2b0, 0xffffffff, 0x00000100,
1129         0x3c270, 0xffffffff, 0x00000100,
1130         0x30800, 0xffffffff, 0xe0000000,
1131         0x3c020, 0xffffffff, 0x00010000,
1132         0x3c024, 0xffffffff, 0x00030002,
1133         0x3c028, 0xffffffff, 0x00040007,
1134         0x3c02c, 0xffffffff, 0x00060005,
1135         0x3c030, 0xffffffff, 0x00090008,
1136         0x3c034, 0xffffffff, 0x00010000,
1137         0x3c038, 0xffffffff, 0x00030002,
1138         0x3c03c, 0xffffffff, 0x00040007,
1139         0x3c040, 0xffffffff, 0x00060005,
1140         0x3c044, 0xffffffff, 0x00090008,
1141         0x3c048, 0xffffffff, 0x00010000,
1142         0x3c04c, 0xffffffff, 0x00030002,
1143         0x3c050, 0xffffffff, 0x00040007,
1144         0x3c054, 0xffffffff, 0x00060005,
1145         0x3c058, 0xffffffff, 0x00090008,
1146         0x3c05c, 0xffffffff, 0x00010000,
1147         0x3c060, 0xffffffff, 0x00030002,
1148         0x3c064, 0xffffffff, 0x00040007,
1149         0x3c068, 0xffffffff, 0x00060005,
1150         0x3c06c, 0xffffffff, 0x00090008,
1151         0x3c070, 0xffffffff, 0x00010000,
1152         0x3c074, 0xffffffff, 0x00030002,
1153         0x3c078, 0xffffffff, 0x00040007,
1154         0x3c07c, 0xffffffff, 0x00060005,
1155         0x3c080, 0xffffffff, 0x00090008,
1156         0x3c084, 0xffffffff, 0x00010000,
1157         0x3c088, 0xffffffff, 0x00030002,
1158         0x3c08c, 0xffffffff, 0x00040007,
1159         0x3c090, 0xffffffff, 0x00060005,
1160         0x3c094, 0xffffffff, 0x00090008,
1161         0x3c098, 0xffffffff, 0x00010000,
1162         0x3c09c, 0xffffffff, 0x00030002,
1163         0x3c0a0, 0xffffffff, 0x00040007,
1164         0x3c0a4, 0xffffffff, 0x00060005,
1165         0x3c0a8, 0xffffffff, 0x00090008,
1166         0x3c000, 0xffffffff, 0x96e00200,
1167         0x8708, 0xffffffff, 0x00900100,
1168         0xc424, 0xffffffff, 0x0020003f,
1169         0x38, 0xffffffff, 0x0140001c,
1170         0x3c, 0x000f0000, 0x000f0000,
1171         0x220, 0xffffffff, 0xC060000C,
1172         0x224, 0xc0000fff, 0x00000100,
1173         0xf90, 0xffffffff, 0x00000100,
1174         0xf98, 0x00000101, 0x00000000,
1175         0x20a8, 0xffffffff, 0x00000104,
1176         0x55e4, 0xff000fff, 0x00000100,
1177         0x30cc, 0xc0000fff, 0x00000104,
1178         0xc1e4, 0x00000001, 0x00000001,
1179         0xd00c, 0xff000ff0, 0x00000100,
1180         0xd80c, 0xff000ff0, 0x00000100
1181 };
1182
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185         0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190         0xc770, 0xffffffff, 0x00000800,
1191         0xc774, 0xffffffff, 0x00000800,
1192         0xc798, 0xffffffff, 0x00007fbf,
1193         0xc79c, 0xffffffff, 0x00007faf
1194 };
1195
1196 static const u32 spectre_golden_registers[] =
1197 {
1198         0x3c000, 0xffff1fff, 0x96940200,
1199         0x3c00c, 0xffff0001, 0xff000000,
1200         0x3c200, 0xfffc0fff, 0x00000100,
1201         0x6ed8, 0x00010101, 0x00010000,
1202         0x9834, 0xf00fffff, 0x00000400,
1203         0x9838, 0xfffffffc, 0x00020200,
1204         0x5bb0, 0x000000f0, 0x00000070,
1205         0x5bc0, 0xf0311fff, 0x80300000,
1206         0x98f8, 0x73773777, 0x12010001,
1207         0x9b7c, 0x00ff0000, 0x00fc0000,
1208         0x2f48, 0x73773777, 0x12010001,
1209         0x8a14, 0xf000003f, 0x00000007,
1210         0x8b24, 0xffffffff, 0x00ffffff,
1211         0x28350, 0x3f3f3fff, 0x00000082,
1212         0x28354, 0x0000003f, 0x00000000,
1213         0x3e78, 0x00000001, 0x00000002,
1214         0x913c, 0xffff03df, 0x00000004,
1215         0xc768, 0x00000008, 0x00000008,
1216         0x8c00, 0x000008ff, 0x00000800,
1217         0x9508, 0x00010000, 0x00010000,
1218         0xac0c, 0xffffffff, 0x54763210,
1219         0x214f8, 0x01ff01ff, 0x00000002,
1220         0x21498, 0x007ff800, 0x00200000,
1221         0x2015c, 0xffffffff, 0x00000f40,
1222         0x30934, 0xffffffff, 0x00000001
1223 };
1224
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227         0xc420, 0xffffffff, 0xfffffffc,
1228         0x30800, 0xffffffff, 0xe0000000,
1229         0x3c2a0, 0xffffffff, 0x00000100,
1230         0x3c208, 0xffffffff, 0x00000100,
1231         0x3c2c0, 0xffffffff, 0x00000100,
1232         0x3c2c8, 0xffffffff, 0x00000100,
1233         0x3c2c4, 0xffffffff, 0x00000100,
1234         0x55e4, 0xffffffff, 0x00600100,
1235         0x3c280, 0xffffffff, 0x00000100,
1236         0x3c214, 0xffffffff, 0x06000100,
1237         0x3c220, 0xffffffff, 0x00000100,
1238         0x3c218, 0xffffffff, 0x06000100,
1239         0x3c204, 0xffffffff, 0x00000100,
1240         0x3c2e0, 0xffffffff, 0x00000100,
1241         0x3c224, 0xffffffff, 0x00000100,
1242         0x3c200, 0xffffffff, 0x00000100,
1243         0x3c230, 0xffffffff, 0x00000100,
1244         0x3c234, 0xffffffff, 0x00000100,
1245         0x3c250, 0xffffffff, 0x00000100,
1246         0x3c254, 0xffffffff, 0x00000100,
1247         0x3c258, 0xffffffff, 0x00000100,
1248         0x3c25c, 0xffffffff, 0x00000100,
1249         0x3c260, 0xffffffff, 0x00000100,
1250         0x3c27c, 0xffffffff, 0x00000100,
1251         0x3c278, 0xffffffff, 0x00000100,
1252         0x3c210, 0xffffffff, 0x06000100,
1253         0x3c290, 0xffffffff, 0x00000100,
1254         0x3c274, 0xffffffff, 0x00000100,
1255         0x3c2b4, 0xffffffff, 0x00000100,
1256         0x3c2b0, 0xffffffff, 0x00000100,
1257         0x3c270, 0xffffffff, 0x00000100,
1258         0x30800, 0xffffffff, 0xe0000000,
1259         0x3c020, 0xffffffff, 0x00010000,
1260         0x3c024, 0xffffffff, 0x00030002,
1261         0x3c028, 0xffffffff, 0x00040007,
1262         0x3c02c, 0xffffffff, 0x00060005,
1263         0x3c030, 0xffffffff, 0x00090008,
1264         0x3c034, 0xffffffff, 0x00010000,
1265         0x3c038, 0xffffffff, 0x00030002,
1266         0x3c03c, 0xffffffff, 0x00040007,
1267         0x3c040, 0xffffffff, 0x00060005,
1268         0x3c044, 0xffffffff, 0x00090008,
1269         0x3c048, 0xffffffff, 0x00010000,
1270         0x3c04c, 0xffffffff, 0x00030002,
1271         0x3c050, 0xffffffff, 0x00040007,
1272         0x3c054, 0xffffffff, 0x00060005,
1273         0x3c058, 0xffffffff, 0x00090008,
1274         0x3c05c, 0xffffffff, 0x00010000,
1275         0x3c060, 0xffffffff, 0x00030002,
1276         0x3c064, 0xffffffff, 0x00040007,
1277         0x3c068, 0xffffffff, 0x00060005,
1278         0x3c06c, 0xffffffff, 0x00090008,
1279         0x3c070, 0xffffffff, 0x00010000,
1280         0x3c074, 0xffffffff, 0x00030002,
1281         0x3c078, 0xffffffff, 0x00040007,
1282         0x3c07c, 0xffffffff, 0x00060005,
1283         0x3c080, 0xffffffff, 0x00090008,
1284         0x3c084, 0xffffffff, 0x00010000,
1285         0x3c088, 0xffffffff, 0x00030002,
1286         0x3c08c, 0xffffffff, 0x00040007,
1287         0x3c090, 0xffffffff, 0x00060005,
1288         0x3c094, 0xffffffff, 0x00090008,
1289         0x3c098, 0xffffffff, 0x00010000,
1290         0x3c09c, 0xffffffff, 0x00030002,
1291         0x3c0a0, 0xffffffff, 0x00040007,
1292         0x3c0a4, 0xffffffff, 0x00060005,
1293         0x3c0a8, 0xffffffff, 0x00090008,
1294         0x3c0ac, 0xffffffff, 0x00010000,
1295         0x3c0b0, 0xffffffff, 0x00030002,
1296         0x3c0b4, 0xffffffff, 0x00040007,
1297         0x3c0b8, 0xffffffff, 0x00060005,
1298         0x3c0bc, 0xffffffff, 0x00090008,
1299         0x3c000, 0xffffffff, 0x96e00200,
1300         0x8708, 0xffffffff, 0x00900100,
1301         0xc424, 0xffffffff, 0x0020003f,
1302         0x38, 0xffffffff, 0x0140001c,
1303         0x3c, 0x000f0000, 0x000f0000,
1304         0x220, 0xffffffff, 0xC060000C,
1305         0x224, 0xc0000fff, 0x00000100,
1306         0xf90, 0xffffffff, 0x00000100,
1307         0xf98, 0x00000101, 0x00000000,
1308         0x20a8, 0xffffffff, 0x00000104,
1309         0x55e4, 0xff000fff, 0x00000100,
1310         0x30cc, 0xc0000fff, 0x00000104,
1311         0xc1e4, 0x00000001, 0x00000001,
1312         0xd00c, 0xff000ff0, 0x00000100,
1313         0xd80c, 0xff000ff0, 0x00000100
1314 };
1315
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318         0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323         0xc770, 0xffffffff, 0x00000800,
1324         0xc774, 0xffffffff, 0x00000800,
1325         0xc798, 0xffffffff, 0x00007fbf,
1326         0xc79c, 0xffffffff, 0x00007faf
1327 };
1328
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331         0x3c000, 0xffffdfff, 0x6e944040,
1332         0x55e4, 0xff607fff, 0xfc000100,
1333         0x3c220, 0xff000fff, 0x00000100,
1334         0x3c224, 0xff000fff, 0x00000100,
1335         0x3c200, 0xfffc0fff, 0x00000100,
1336         0x6ed8, 0x00010101, 0x00010000,
1337         0x9830, 0xffffffff, 0x00000000,
1338         0x9834, 0xf00fffff, 0x00000400,
1339         0x5bb0, 0x000000f0, 0x00000070,
1340         0x5bc0, 0xf0311fff, 0x80300000,
1341         0x98f8, 0x73773777, 0x12010001,
1342         0x98fc, 0xffffffff, 0x00000010,
1343         0x9b7c, 0x00ff0000, 0x00fc0000,
1344         0x8030, 0x00001f0f, 0x0000100a,
1345         0x2f48, 0x73773777, 0x12010001,
1346         0x2408, 0x000fffff, 0x000c007f,
1347         0x8a14, 0xf000003f, 0x00000007,
1348         0x8b24, 0x3fff3fff, 0x00ffcfff,
1349         0x30a04, 0x0000ff0f, 0x00000000,
1350         0x28a4c, 0x07ffffff, 0x06000000,
1351         0x4d8, 0x00000fff, 0x00000100,
1352         0x3e78, 0x00000001, 0x00000002,
1353         0xc768, 0x00000008, 0x00000008,
1354         0x8c00, 0x000000ff, 0x00000003,
1355         0x214f8, 0x01ff01ff, 0x00000002,
1356         0x21498, 0x007ff800, 0x00200000,
1357         0x2015c, 0xffffffff, 0x00000f40,
1358         0x88c4, 0x001f3ae3, 0x00000082,
1359         0x88d4, 0x0000001f, 0x00000010,
1360         0x30934, 0xffffffff, 0x00000000
1361 };
1362
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365         0xc420, 0xffffffff, 0xfffffffc,
1366         0x30800, 0xffffffff, 0xe0000000,
1367         0x3c2a0, 0xffffffff, 0x00000100,
1368         0x3c208, 0xffffffff, 0x00000100,
1369         0x3c2c0, 0xffffffff, 0x00000100,
1370         0x3c2c8, 0xffffffff, 0x00000100,
1371         0x3c2c4, 0xffffffff, 0x00000100,
1372         0x55e4, 0xffffffff, 0x00600100,
1373         0x3c280, 0xffffffff, 0x00000100,
1374         0x3c214, 0xffffffff, 0x06000100,
1375         0x3c220, 0xffffffff, 0x00000100,
1376         0x3c218, 0xffffffff, 0x06000100,
1377         0x3c204, 0xffffffff, 0x00000100,
1378         0x3c2e0, 0xffffffff, 0x00000100,
1379         0x3c224, 0xffffffff, 0x00000100,
1380         0x3c200, 0xffffffff, 0x00000100,
1381         0x3c230, 0xffffffff, 0x00000100,
1382         0x3c234, 0xffffffff, 0x00000100,
1383         0x3c250, 0xffffffff, 0x00000100,
1384         0x3c254, 0xffffffff, 0x00000100,
1385         0x3c258, 0xffffffff, 0x00000100,
1386         0x3c25c, 0xffffffff, 0x00000100,
1387         0x3c260, 0xffffffff, 0x00000100,
1388         0x3c27c, 0xffffffff, 0x00000100,
1389         0x3c278, 0xffffffff, 0x00000100,
1390         0x3c210, 0xffffffff, 0x06000100,
1391         0x3c290, 0xffffffff, 0x00000100,
1392         0x3c274, 0xffffffff, 0x00000100,
1393         0x3c2b4, 0xffffffff, 0x00000100,
1394         0x3c2b0, 0xffffffff, 0x00000100,
1395         0x3c270, 0xffffffff, 0x00000100,
1396         0x30800, 0xffffffff, 0xe0000000,
1397         0x3c020, 0xffffffff, 0x00010000,
1398         0x3c024, 0xffffffff, 0x00030002,
1399         0x3c028, 0xffffffff, 0x00040007,
1400         0x3c02c, 0xffffffff, 0x00060005,
1401         0x3c030, 0xffffffff, 0x00090008,
1402         0x3c034, 0xffffffff, 0x00010000,
1403         0x3c038, 0xffffffff, 0x00030002,
1404         0x3c03c, 0xffffffff, 0x00040007,
1405         0x3c040, 0xffffffff, 0x00060005,
1406         0x3c044, 0xffffffff, 0x00090008,
1407         0x3c000, 0xffffffff, 0x96e00200,
1408         0x8708, 0xffffffff, 0x00900100,
1409         0xc424, 0xffffffff, 0x0020003f,
1410         0x38, 0xffffffff, 0x0140001c,
1411         0x3c, 0x000f0000, 0x000f0000,
1412         0x220, 0xffffffff, 0xC060000C,
1413         0x224, 0xc0000fff, 0x00000100,
1414         0x20a8, 0xffffffff, 0x00000104,
1415         0x55e4, 0xff000fff, 0x00000100,
1416         0x30cc, 0xc0000fff, 0x00000104,
1417         0xc1e4, 0x00000001, 0x00000001,
1418         0xd00c, 0xff000ff0, 0x00000100,
1419         0xd80c, 0xff000ff0, 0x00000100
1420 };
1421
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424         0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429         0x30800, 0xffffffff, 0xe0000000,
1430         0x28350, 0xffffffff, 0x3a00161a,
1431         0x28354, 0xffffffff, 0x0000002e,
1432         0x9a10, 0xffffffff, 0x00018208,
1433         0x98f8, 0xffffffff, 0x12011003
1434 };
1435
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438         0x3354, 0x00000333, 0x00000333,
1439         0x9a10, 0x00010000, 0x00058208,
1440         0x9830, 0xffffffff, 0x00000000,
1441         0x9834, 0xf00fffff, 0x00000400,
1442         0x9838, 0x0002021c, 0x00020200,
1443         0xc78, 0x00000080, 0x00000000,
1444         0x5bb0, 0x000000f0, 0x00000070,
1445         0x5bc0, 0xf0311fff, 0x80300000,
1446         0x350c, 0x00810000, 0x408af000,
1447         0x7030, 0x31000111, 0x00000011,
1448         0x2f48, 0x73773777, 0x12010001,
1449         0x2120, 0x0000007f, 0x0000001b,
1450         0x21dc, 0x00007fb6, 0x00002191,
1451         0x3628, 0x0000003f, 0x0000000a,
1452         0x362c, 0x0000003f, 0x0000000a,
1453         0x2ae4, 0x00073ffe, 0x000022a2,
1454         0x240c, 0x000007ff, 0x00000000,
1455         0x8bf0, 0x00002001, 0x00000001,
1456         0x8b24, 0xffffffff, 0x00ffffff,
1457         0x30a04, 0x0000ff0f, 0x00000000,
1458         0x28a4c, 0x07ffffff, 0x06000000,
1459         0x3e78, 0x00000001, 0x00000002,
1460         0xc768, 0x00000008, 0x00000008,
1461         0xc770, 0x00000f00, 0x00000800,
1462         0xc774, 0x00000f00, 0x00000800,
1463         0xc798, 0x00ffffff, 0x00ff7fbf,
1464         0xc79c, 0x00ffffff, 0x00ff7faf,
1465         0x8c00, 0x000000ff, 0x00000800,
1466         0xe40, 0x00001fff, 0x00001fff,
1467         0x9060, 0x0000007f, 0x00000020,
1468         0x9508, 0x00010000, 0x00010000,
1469         0xae00, 0x00100000, 0x000ff07c,
1470         0xac14, 0x000003ff, 0x0000000f,
1471         0xac10, 0xffffffff, 0x7564fdec,
1472         0xac0c, 0xffffffff, 0x3120b9a8,
1473         0xac08, 0x20000000, 0x0f9c0000
1474 };
1475
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478         0xc420, 0xffffffff, 0xfffffffd,
1479         0x30800, 0xffffffff, 0xe0000000,
1480         0x3c2a0, 0xffffffff, 0x00000100,
1481         0x3c208, 0xffffffff, 0x00000100,
1482         0x3c2c0, 0xffffffff, 0x00000100,
1483         0x3c2c8, 0xffffffff, 0x00000100,
1484         0x3c2c4, 0xffffffff, 0x00000100,
1485         0x55e4, 0xffffffff, 0x00200100,
1486         0x3c280, 0xffffffff, 0x00000100,
1487         0x3c214, 0xffffffff, 0x06000100,
1488         0x3c220, 0xffffffff, 0x00000100,
1489         0x3c218, 0xffffffff, 0x06000100,
1490         0x3c204, 0xffffffff, 0x00000100,
1491         0x3c2e0, 0xffffffff, 0x00000100,
1492         0x3c224, 0xffffffff, 0x00000100,
1493         0x3c200, 0xffffffff, 0x00000100,
1494         0x3c230, 0xffffffff, 0x00000100,
1495         0x3c234, 0xffffffff, 0x00000100,
1496         0x3c250, 0xffffffff, 0x00000100,
1497         0x3c254, 0xffffffff, 0x00000100,
1498         0x3c258, 0xffffffff, 0x00000100,
1499         0x3c25c, 0xffffffff, 0x00000100,
1500         0x3c260, 0xffffffff, 0x00000100,
1501         0x3c27c, 0xffffffff, 0x00000100,
1502         0x3c278, 0xffffffff, 0x00000100,
1503         0x3c210, 0xffffffff, 0x06000100,
1504         0x3c290, 0xffffffff, 0x00000100,
1505         0x3c274, 0xffffffff, 0x00000100,
1506         0x3c2b4, 0xffffffff, 0x00000100,
1507         0x3c2b0, 0xffffffff, 0x00000100,
1508         0x3c270, 0xffffffff, 0x00000100,
1509         0x30800, 0xffffffff, 0xe0000000,
1510         0x3c020, 0xffffffff, 0x00010000,
1511         0x3c024, 0xffffffff, 0x00030002,
1512         0x3c028, 0xffffffff, 0x00040007,
1513         0x3c02c, 0xffffffff, 0x00060005,
1514         0x3c030, 0xffffffff, 0x00090008,
1515         0x3c034, 0xffffffff, 0x00010000,
1516         0x3c038, 0xffffffff, 0x00030002,
1517         0x3c03c, 0xffffffff, 0x00040007,
1518         0x3c040, 0xffffffff, 0x00060005,
1519         0x3c044, 0xffffffff, 0x00090008,
1520         0x3c048, 0xffffffff, 0x00010000,
1521         0x3c04c, 0xffffffff, 0x00030002,
1522         0x3c050, 0xffffffff, 0x00040007,
1523         0x3c054, 0xffffffff, 0x00060005,
1524         0x3c058, 0xffffffff, 0x00090008,
1525         0x3c05c, 0xffffffff, 0x00010000,
1526         0x3c060, 0xffffffff, 0x00030002,
1527         0x3c064, 0xffffffff, 0x00040007,
1528         0x3c068, 0xffffffff, 0x00060005,
1529         0x3c06c, 0xffffffff, 0x00090008,
1530         0x3c070, 0xffffffff, 0x00010000,
1531         0x3c074, 0xffffffff, 0x00030002,
1532         0x3c078, 0xffffffff, 0x00040007,
1533         0x3c07c, 0xffffffff, 0x00060005,
1534         0x3c080, 0xffffffff, 0x00090008,
1535         0x3c084, 0xffffffff, 0x00010000,
1536         0x3c088, 0xffffffff, 0x00030002,
1537         0x3c08c, 0xffffffff, 0x00040007,
1538         0x3c090, 0xffffffff, 0x00060005,
1539         0x3c094, 0xffffffff, 0x00090008,
1540         0x3c098, 0xffffffff, 0x00010000,
1541         0x3c09c, 0xffffffff, 0x00030002,
1542         0x3c0a0, 0xffffffff, 0x00040007,
1543         0x3c0a4, 0xffffffff, 0x00060005,
1544         0x3c0a8, 0xffffffff, 0x00090008,
1545         0x3c0ac, 0xffffffff, 0x00010000,
1546         0x3c0b0, 0xffffffff, 0x00030002,
1547         0x3c0b4, 0xffffffff, 0x00040007,
1548         0x3c0b8, 0xffffffff, 0x00060005,
1549         0x3c0bc, 0xffffffff, 0x00090008,
1550         0x3c0c0, 0xffffffff, 0x00010000,
1551         0x3c0c4, 0xffffffff, 0x00030002,
1552         0x3c0c8, 0xffffffff, 0x00040007,
1553         0x3c0cc, 0xffffffff, 0x00060005,
1554         0x3c0d0, 0xffffffff, 0x00090008,
1555         0x3c0d4, 0xffffffff, 0x00010000,
1556         0x3c0d8, 0xffffffff, 0x00030002,
1557         0x3c0dc, 0xffffffff, 0x00040007,
1558         0x3c0e0, 0xffffffff, 0x00060005,
1559         0x3c0e4, 0xffffffff, 0x00090008,
1560         0x3c0e8, 0xffffffff, 0x00010000,
1561         0x3c0ec, 0xffffffff, 0x00030002,
1562         0x3c0f0, 0xffffffff, 0x00040007,
1563         0x3c0f4, 0xffffffff, 0x00060005,
1564         0x3c0f8, 0xffffffff, 0x00090008,
1565         0xc318, 0xffffffff, 0x00020200,
1566         0x3350, 0xffffffff, 0x00000200,
1567         0x15c0, 0xffffffff, 0x00000400,
1568         0x55e8, 0xffffffff, 0x00000000,
1569         0x2f50, 0xffffffff, 0x00000902,
1570         0x3c000, 0xffffffff, 0x96940200,
1571         0x8708, 0xffffffff, 0x00900100,
1572         0xc424, 0xffffffff, 0x0020003f,
1573         0x38, 0xffffffff, 0x0140001c,
1574         0x3c, 0x000f0000, 0x000f0000,
1575         0x220, 0xffffffff, 0xc060000c,
1576         0x224, 0xc0000fff, 0x00000100,
1577         0xf90, 0xffffffff, 0x00000100,
1578         0xf98, 0x00000101, 0x00000000,
1579         0x20a8, 0xffffffff, 0x00000104,
1580         0x55e4, 0xff000fff, 0x00000100,
1581         0x30cc, 0xc0000fff, 0x00000104,
1582         0xc1e4, 0x00000001, 0x00000001,
1583         0xd00c, 0xff000ff0, 0x00000100,
1584         0xd80c, 0xff000ff0, 0x00000100
1585 };
1586
1587 static const u32 godavari_golden_registers[] =
1588 {
1589         0x55e4, 0xff607fff, 0xfc000100,
1590         0x6ed8, 0x00010101, 0x00010000,
1591         0x9830, 0xffffffff, 0x00000000,
1592         0x98302, 0xf00fffff, 0x00000400,
1593         0x6130, 0xffffffff, 0x00010000,
1594         0x5bb0, 0x000000f0, 0x00000070,
1595         0x5bc0, 0xf0311fff, 0x80300000,
1596         0x98f8, 0x73773777, 0x12010001,
1597         0x98fc, 0xffffffff, 0x00000010,
1598         0x8030, 0x00001f0f, 0x0000100a,
1599         0x2f48, 0x73773777, 0x12010001,
1600         0x2408, 0x000fffff, 0x000c007f,
1601         0x8a14, 0xf000003f, 0x00000007,
1602         0x8b24, 0xffffffff, 0x00ff0fff,
1603         0x30a04, 0x0000ff0f, 0x00000000,
1604         0x28a4c, 0x07ffffff, 0x06000000,
1605         0x4d8, 0x00000fff, 0x00000100,
1606         0xd014, 0x00010000, 0x00810001,
1607         0xd814, 0x00010000, 0x00810001,
1608         0x3e78, 0x00000001, 0x00000002,
1609         0xc768, 0x00000008, 0x00000008,
1610         0xc770, 0x00000f00, 0x00000800,
1611         0xc774, 0x00000f00, 0x00000800,
1612         0xc798, 0x00ffffff, 0x00ff7fbf,
1613         0xc79c, 0x00ffffff, 0x00ff7faf,
1614         0x8c00, 0x000000ff, 0x00000001,
1615         0x214f8, 0x01ff01ff, 0x00000002,
1616         0x21498, 0x007ff800, 0x00200000,
1617         0x2015c, 0xffffffff, 0x00000f40,
1618         0x88c4, 0x001f3ae3, 0x00000082,
1619         0x88d4, 0x0000001f, 0x00000010,
1620         0x30934, 0xffffffff, 0x00000000
1621 };
1622
1623
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627         mutex_lock(&rdev->grbm_idx_mutex);
1628         switch (rdev->family) {
1629         case CHIP_BONAIRE:
1630                 radeon_program_register_sequence(rdev,
1631                                                  bonaire_mgcg_cgcg_init,
1632                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633                 radeon_program_register_sequence(rdev,
1634                                                  bonaire_golden_registers,
1635                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  bonaire_golden_common_registers,
1638                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639                 radeon_program_register_sequence(rdev,
1640                                                  bonaire_golden_spm_registers,
1641                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642                 break;
1643         case CHIP_KABINI:
1644                 radeon_program_register_sequence(rdev,
1645                                                  kalindi_mgcg_cgcg_init,
1646                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647                 radeon_program_register_sequence(rdev,
1648                                                  kalindi_golden_registers,
1649                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650                 radeon_program_register_sequence(rdev,
1651                                                  kalindi_golden_common_registers,
1652                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653                 radeon_program_register_sequence(rdev,
1654                                                  kalindi_golden_spm_registers,
1655                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656                 break;
1657         case CHIP_MULLINS:
1658                 radeon_program_register_sequence(rdev,
1659                                                  kalindi_mgcg_cgcg_init,
1660                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661                 radeon_program_register_sequence(rdev,
1662                                                  godavari_golden_registers,
1663                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1664                 radeon_program_register_sequence(rdev,
1665                                                  kalindi_golden_common_registers,
1666                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667                 radeon_program_register_sequence(rdev,
1668                                                  kalindi_golden_spm_registers,
1669                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670                 break;
1671         case CHIP_KAVERI:
1672                 radeon_program_register_sequence(rdev,
1673                                                  spectre_mgcg_cgcg_init,
1674                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675                 radeon_program_register_sequence(rdev,
1676                                                  spectre_golden_registers,
1677                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1678                 radeon_program_register_sequence(rdev,
1679                                                  spectre_golden_common_registers,
1680                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681                 radeon_program_register_sequence(rdev,
1682                                                  spectre_golden_spm_registers,
1683                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684                 break;
1685         case CHIP_HAWAII:
1686                 radeon_program_register_sequence(rdev,
1687                                                  hawaii_mgcg_cgcg_init,
1688                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689                 radeon_program_register_sequence(rdev,
1690                                                  hawaii_golden_registers,
1691                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692                 radeon_program_register_sequence(rdev,
1693                                                  hawaii_golden_common_registers,
1694                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695                 radeon_program_register_sequence(rdev,
1696                                                  hawaii_golden_spm_registers,
1697                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698                 break;
1699         default:
1700                 break;
1701         }
1702         mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716
1717         if (rdev->flags & RADEON_IS_IGP) {
1718                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719                         return reference_clock / 2;
1720         } else {
1721                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722                         return reference_clock / 4;
1723         }
1724         return reference_clock;
1725 }
1726
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738         if (index < rdev->doorbell.num_doorbells) {
1739                 return readl(rdev->doorbell.ptr + index);
1740         } else {
1741                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742                 return 0;
1743         }
1744 }
1745
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758         if (index < rdev->doorbell.num_doorbells) {
1759                 writel(v, rdev->doorbell.ptr + index);
1760         } else {
1761                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762         }
1763 }
1764
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769         {0x00000070, 0x04400000},
1770         {0x00000071, 0x80c01803},
1771         {0x00000072, 0x00004004},
1772         {0x00000073, 0x00000100},
1773         {0x00000074, 0x00ff0000},
1774         {0x00000075, 0x34000000},
1775         {0x00000076, 0x08000014},
1776         {0x00000077, 0x00cc08ec},
1777         {0x00000078, 0x00000400},
1778         {0x00000079, 0x00000000},
1779         {0x0000007a, 0x04090000},
1780         {0x0000007c, 0x00000000},
1781         {0x0000007e, 0x4408a8e8},
1782         {0x0000007f, 0x00000304},
1783         {0x00000080, 0x00000000},
1784         {0x00000082, 0x00000001},
1785         {0x00000083, 0x00000002},
1786         {0x00000084, 0xf3e4f400},
1787         {0x00000085, 0x052024e3},
1788         {0x00000087, 0x00000000},
1789         {0x00000088, 0x01000000},
1790         {0x0000008a, 0x1c0a0000},
1791         {0x0000008b, 0xff010000},
1792         {0x0000008d, 0xffffefff},
1793         {0x0000008e, 0xfff3efff},
1794         {0x0000008f, 0xfff3efbf},
1795         {0x00000092, 0xf7ffffff},
1796         {0x00000093, 0xffffff7f},
1797         {0x00000095, 0x00101101},
1798         {0x00000096, 0x00000fff},
1799         {0x00000097, 0x00116fff},
1800         {0x00000098, 0x60010000},
1801         {0x00000099, 0x10010000},
1802         {0x0000009a, 0x00006000},
1803         {0x0000009b, 0x00001000},
1804         {0x0000009f, 0x00b48000}
1805 };
1806
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811         {0x0000007d, 0x40000000},
1812         {0x0000007e, 0x40180304},
1813         {0x0000007f, 0x0000ff00},
1814         {0x00000081, 0x00000000},
1815         {0x00000083, 0x00000800},
1816         {0x00000086, 0x00000000},
1817         {0x00000087, 0x00000100},
1818         {0x00000088, 0x00020100},
1819         {0x00000089, 0x00000000},
1820         {0x0000008b, 0x00040000},
1821         {0x0000008c, 0x00000100},
1822         {0x0000008e, 0xff010000},
1823         {0x00000090, 0xffffefff},
1824         {0x00000091, 0xfff3efff},
1825         {0x00000092, 0xfff3efbf},
1826         {0x00000093, 0xf7ffffff},
1827         {0x00000094, 0xffffff7f},
1828         {0x00000095, 0x00000fff},
1829         {0x00000096, 0x00116fff},
1830         {0x00000097, 0x60010000},
1831         {0x00000098, 0x10010000},
1832         {0x0000009f, 0x00c79000}
1833 };
1834
1835
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850                             u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853                              MEID(me & 0x3) |
1854                              VMID(vmid & 0xf) |
1855                              QUEUEID(queue & 0x7));
1856         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870         const __be32 *fw_data = NULL;
1871         const __le32 *new_fw_data = NULL;
1872         u32 running, blackout = 0, tmp;
1873         u32 *io_mc_regs = NULL;
1874         const __le32 *new_io_mc_regs = NULL;
1875         int i, regs_size, ucode_size;
1876
1877         if (!rdev->mc_fw)
1878                 return -EINVAL;
1879
1880         if (rdev->new_fw) {
1881                 const struct mc_firmware_header_v1_0 *hdr =
1882                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883
1884                 radeon_ucode_print_mc_hdr(&hdr->header);
1885
1886                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887                 new_io_mc_regs = (const __le32 *)
1888                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890                 new_fw_data = (const __le32 *)
1891                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892         } else {
1893                 ucode_size = rdev->mc_fw->size / 4;
1894
1895                 switch (rdev->family) {
1896                 case CHIP_BONAIRE:
1897                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899                         break;
1900                 case CHIP_HAWAII:
1901                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1903                         break;
1904                 default:
1905                         return -EINVAL;
1906                 }
1907                 fw_data = (const __be32 *)rdev->mc_fw->data;
1908         }
1909
1910         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911
1912         if (running == 0) {
1913                 if (running) {
1914                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916                 }
1917
1918                 /* reset the engine and set to writable */
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922                 /* load mc io regs */
1923                 for (i = 0; i < regs_size; i++) {
1924                         if (rdev->new_fw) {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927                         } else {
1928                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930                         }
1931                 }
1932
1933                 tmp = RREG32(MC_SEQ_MISC0);
1934                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939                 }
1940
1941                 /* load the MC ucode */
1942                 for (i = 0; i < ucode_size; i++) {
1943                         if (rdev->new_fw)
1944                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945                         else
1946                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947                 }
1948
1949                 /* put the engine back into the active state */
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954                 /* wait for training to complete */
1955                 for (i = 0; i < rdev->usec_timeout; i++) {
1956                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957                                 break;
1958                         udelay(1);
1959                 }
1960                 for (i = 0; i < rdev->usec_timeout; i++) {
1961                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962                                 break;
1963                         udelay(1);
1964                 }
1965
1966                 if (running)
1967                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968         }
1969
1970         return 0;
1971 }
1972
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984         const char *chip_name;
1985         const char *new_chip_name;
1986         size_t pfp_req_size, me_req_size, ce_req_size,
1987                 mec_req_size, rlc_req_size, mc_req_size = 0,
1988                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989         char fw_name[30];
1990         int new_fw = 0;
1991         int err;
1992         int num_fw;
1993
1994         DRM_DEBUG("\n");
1995
1996         switch (rdev->family) {
1997         case CHIP_BONAIRE:
1998                 chip_name = "BONAIRE";
1999                 new_chip_name = "bonaire";
2000                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2002                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009                 num_fw = 8;
2010                 break;
2011         case CHIP_HAWAII:
2012                 chip_name = "HAWAII";
2013                 new_chip_name = "hawaii";
2014                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023                 num_fw = 8;
2024                 break;
2025         case CHIP_KAVERI:
2026                 chip_name = "KAVERI";
2027                 new_chip_name = "kaveri";
2028                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2030                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034                 num_fw = 7;
2035                 break;
2036         case CHIP_KABINI:
2037                 chip_name = "KABINI";
2038                 new_chip_name = "kabini";
2039                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2041                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045                 num_fw = 6;
2046                 break;
2047         case CHIP_MULLINS:
2048                 chip_name = "MULLINS";
2049                 new_chip_name = "mullins";
2050                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2052                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056                 num_fw = 6;
2057                 break;
2058         default: BUG();
2059         }
2060
2061         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065         if (err) {
2066                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068                 if (err)
2069                         goto out;
2070                 if (rdev->pfp_fw->size != pfp_req_size) {
2071                         printk(KERN_ERR
2072                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073                                rdev->pfp_fw->size, fw_name);
2074                         err = -EINVAL;
2075                         goto out;
2076                 }
2077         } else {
2078                 err = radeon_ucode_validate(rdev->pfp_fw);
2079                 if (err) {
2080                         printk(KERN_ERR
2081                                "cik_fw: validation failed for firmware \"%s\"\n",
2082                                fw_name);
2083                         goto out;
2084                 } else {
2085                         new_fw++;
2086                 }
2087         }
2088
2089         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091         if (err) {
2092                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094                 if (err)
2095                         goto out;
2096                 if (rdev->me_fw->size != me_req_size) {
2097                         printk(KERN_ERR
2098                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->me_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->me_fw);
2104                 if (err) {
2105                         printk(KERN_ERR
2106                                "cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->ce_fw->size != ce_req_size) {
2122                         printk(KERN_ERR
2123                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124                                rdev->ce_fw->size, fw_name);
2125                         err = -EINVAL;
2126                 }
2127         } else {
2128                 err = radeon_ucode_validate(rdev->ce_fw);
2129                 if (err) {
2130                         printk(KERN_ERR
2131                                "cik_fw: validation failed for firmware \"%s\"\n",
2132                                fw_name);
2133                         goto out;
2134                 } else {
2135                         new_fw++;
2136                 }
2137         }
2138
2139         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141         if (err) {
2142                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144                 if (err)
2145                         goto out;
2146                 if (rdev->mec_fw->size != mec_req_size) {
2147                         printk(KERN_ERR
2148                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149                                rdev->mec_fw->size, fw_name);
2150                         err = -EINVAL;
2151                 }
2152         } else {
2153                 err = radeon_ucode_validate(rdev->mec_fw);
2154                 if (err) {
2155                         printk(KERN_ERR
2156                                "cik_fw: validation failed for firmware \"%s\"\n",
2157                                fw_name);
2158                         goto out;
2159                 } else {
2160                         new_fw++;
2161                 }
2162         }
2163
2164         if (rdev->family == CHIP_KAVERI) {
2165                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167                 if (err) {
2168                         goto out;
2169                 } else {
2170                         err = radeon_ucode_validate(rdev->mec2_fw);
2171                         if (err) {
2172                                 goto out;
2173                         } else {
2174                                 new_fw++;
2175                         }
2176                 }
2177         }
2178
2179         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181         if (err) {
2182                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184                 if (err)
2185                         goto out;
2186                 if (rdev->rlc_fw->size != rlc_req_size) {
2187                         printk(KERN_ERR
2188                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189                                rdev->rlc_fw->size, fw_name);
2190                         err = -EINVAL;
2191                 }
2192         } else {
2193                 err = radeon_ucode_validate(rdev->rlc_fw);
2194                 if (err) {
2195                         printk(KERN_ERR
2196                                "cik_fw: validation failed for firmware \"%s\"\n",
2197                                fw_name);
2198                         goto out;
2199                 } else {
2200                         new_fw++;
2201                 }
2202         }
2203
2204         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206         if (err) {
2207                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209                 if (err)
2210                         goto out;
2211                 if (rdev->sdma_fw->size != sdma_req_size) {
2212                         printk(KERN_ERR
2213                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214                                rdev->sdma_fw->size, fw_name);
2215                         err = -EINVAL;
2216                 }
2217         } else {
2218                 err = radeon_ucode_validate(rdev->sdma_fw);
2219                 if (err) {
2220                         printk(KERN_ERR
2221                                "cik_fw: validation failed for firmware \"%s\"\n",
2222                                fw_name);
2223                         goto out;
2224                 } else {
2225                         new_fw++;
2226                 }
2227         }
2228
2229         /* No SMC, MC ucode on APUs */
2230         if (!(rdev->flags & RADEON_IS_IGP)) {
2231                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233                 if (err) {
2234                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236                         if (err) {
2237                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239                                 if (err)
2240                                         goto out;
2241                         }
2242                         if ((rdev->mc_fw->size != mc_req_size) &&
2243                             (rdev->mc_fw->size != mc2_req_size)){
2244                                 printk(KERN_ERR
2245                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246                                        rdev->mc_fw->size, fw_name);
2247                                 err = -EINVAL;
2248                         }
2249                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250                 } else {
2251                         err = radeon_ucode_validate(rdev->mc_fw);
2252                         if (err) {
2253                                 printk(KERN_ERR
2254                                        "cik_fw: validation failed for firmware \"%s\"\n",
2255                                        fw_name);
2256                                 goto out;
2257                         } else {
2258                                 new_fw++;
2259                         }
2260                 }
2261
2262                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264                 if (err) {
2265                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267                         if (err) {
2268                                 printk(KERN_ERR
2269                                        "smc: error loading firmware \"%s\"\n",
2270                                        fw_name);
2271                                 release_firmware(rdev->smc_fw);
2272                                 rdev->smc_fw = NULL;
2273                                 err = 0;
2274                         } else if (rdev->smc_fw->size != smc_req_size) {
2275                                 printk(KERN_ERR
2276                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277                                        rdev->smc_fw->size, fw_name);
2278                                 err = -EINVAL;
2279                         }
2280                 } else {
2281                         err = radeon_ucode_validate(rdev->smc_fw);
2282                         if (err) {
2283                                 printk(KERN_ERR
2284                                        "cik_fw: validation failed for firmware \"%s\"\n",
2285                                        fw_name);
2286                                 goto out;
2287                         } else {
2288                                 new_fw++;
2289                         }
2290                 }
2291         }
2292
2293         if (new_fw == 0) {
2294                 rdev->new_fw = false;
2295         } else if (new_fw < num_fw) {
2296                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297                 err = -EINVAL;
2298         } else {
2299                 rdev->new_fw = true;
2300         }
2301
2302 out:
2303         if (err) {
2304                 if (err != -EINVAL)
2305                         printk(KERN_ERR
2306                                "cik_cp: Failed to load firmware \"%s\"\n",
2307                                fw_name);
2308                 release_firmware(rdev->pfp_fw);
2309                 rdev->pfp_fw = NULL;
2310                 release_firmware(rdev->me_fw);
2311                 rdev->me_fw = NULL;
2312                 release_firmware(rdev->ce_fw);
2313                 rdev->ce_fw = NULL;
2314                 release_firmware(rdev->mec_fw);
2315                 rdev->mec_fw = NULL;
2316                 release_firmware(rdev->mec2_fw);
2317                 rdev->mec2_fw = NULL;
2318                 release_firmware(rdev->rlc_fw);
2319                 rdev->rlc_fw = NULL;
2320                 release_firmware(rdev->sdma_fw);
2321                 rdev->sdma_fw = NULL;
2322                 release_firmware(rdev->mc_fw);
2323                 rdev->mc_fw = NULL;
2324                 release_firmware(rdev->smc_fw);
2325                 rdev->smc_fw = NULL;
2326         }
2327         return err;
2328 }
2329
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346         const u32 num_tile_mode_states = 32;
2347         const u32 num_secondary_tile_mode_states = 16;
2348         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349         u32 num_pipe_configs;
2350         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351                 rdev->config.cik.max_shader_engines;
2352
2353         switch (rdev->config.cik.mem_row_size_in_kb) {
2354         case 1:
2355                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356                 break;
2357         case 2:
2358         default:
2359                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360                 break;
2361         case 4:
2362                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363                 break;
2364         }
2365
2366         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367         if (num_pipe_configs > 8)
2368                 num_pipe_configs = 16;
2369
2370         if (num_pipe_configs == 16) {
2371                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372                         switch (reg_offset) {
2373                         case 0:
2374                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378                                 break;
2379                         case 1:
2380                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384                                 break;
2385                         case 2:
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390                                 break;
2391                         case 3:
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396                                 break;
2397                         case 4:
2398                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                                  TILE_SPLIT(split_equal_to_row_size));
2402                                 break;
2403                         case 5:
2404                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407                                 break;
2408                         case 6:
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413                                 break;
2414                         case 7:
2415                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                                  TILE_SPLIT(split_equal_to_row_size));
2419                                 break;
2420                         case 8:
2421                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423                                 break;
2424                         case 9:
2425                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428                                 break;
2429                         case 10:
2430                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                                 break;
2435                         case 11:
2436                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                                 break;
2441                         case 12:
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446                                 break;
2447                         case 13:
2448                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451                                 break;
2452                         case 14:
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457                                 break;
2458                         case 16:
2459                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463                                 break;
2464                         case 17:
2465                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469                                 break;
2470                         case 27:
2471                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474                                 break;
2475                         case 28:
2476                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480                                 break;
2481                         case 29:
2482                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486                                 break;
2487                         case 30:
2488                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492                                 break;
2493                         default:
2494                                 gb_tile_moden = 0;
2495                                 break;
2496                         }
2497                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499                 }
2500                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501                         switch (reg_offset) {
2502                         case 0:
2503                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2507                                 break;
2508                         case 1:
2509                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2513                                 break;
2514                         case 2:
2515                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2519                                 break;
2520                         case 3:
2521                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2525                                 break;
2526                         case 4:
2527                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2531                                 break;
2532                         case 5:
2533                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2537                                 break;
2538                         case 6:
2539                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2543                                 break;
2544                         case 8:
2545                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2549                                 break;
2550                         case 9:
2551                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2555                                 break;
2556                         case 10:
2557                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2561                                 break;
2562                         case 11:
2563                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2567                                 break;
2568                         case 12:
2569                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2573                                 break;
2574                         case 13:
2575                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2579                                 break;
2580                         case 14:
2581                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2585                                 break;
2586                         default:
2587                                 gb_tile_moden = 0;
2588                                 break;
2589                         }
2590                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592                 }
2593         } else if (num_pipe_configs == 8) {
2594                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595                         switch (reg_offset) {
2596                         case 0:
2597                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601                                 break;
2602                         case 1:
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607                                 break;
2608                         case 2:
2609                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613                                 break;
2614                         case 3:
2615                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619                                 break;
2620                         case 4:
2621                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                                  TILE_SPLIT(split_equal_to_row_size));
2625                                 break;
2626                         case 5:
2627                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630                                 break;
2631                         case 6:
2632                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636                                 break;
2637                         case 7:
2638                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                                  TILE_SPLIT(split_equal_to_row_size));
2642                                 break;
2643                         case 8:
2644                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646                                 break;
2647                         case 9:
2648                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651                                 break;
2652                         case 10:
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                                 break;
2658                         case 11:
2659                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663                                 break;
2664                         case 12:
2665                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669                                 break;
2670                         case 13:
2671                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674                                 break;
2675                         case 14:
2676                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680                                 break;
2681                         case 16:
2682                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686                                 break;
2687                         case 17:
2688                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                                 break;
2693                         case 27:
2694                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697                                 break;
2698                         case 28:
2699                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                                 break;
2704                         case 29:
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709                                 break;
2710                         case 30:
2711                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                                 break;
2716                         default:
2717                                 gb_tile_moden = 0;
2718                                 break;
2719                         }
2720                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722                 }
2723                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724                         switch (reg_offset) {
2725                         case 0:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2730                                 break;
2731                         case 1:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2736                                 break;
2737                         case 2:
2738                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2742                                 break;
2743                         case 3:
2744                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2748                                 break;
2749                         case 4:
2750                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2754                                 break;
2755                         case 5:
2756                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2760                                 break;
2761                         case 6:
2762                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2766                                 break;
2767                         case 8:
2768                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2772                                 break;
2773                         case 9:
2774                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2778                                 break;
2779                         case 10:
2780                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2784                                 break;
2785                         case 11:
2786                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2790                                 break;
2791                         case 12:
2792                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2796                                 break;
2797                         case 13:
2798                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2802                                 break;
2803                         case 14:
2804                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2808                                 break;
2809                         default:
2810                                 gb_tile_moden = 0;
2811                                 break;
2812                         }
2813                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815                 }
2816         } else if (num_pipe_configs == 4) {
2817                 if (num_rbs == 4) {
2818                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819                                 switch (reg_offset) {
2820                                 case 0:
2821                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825                                         break;
2826                                 case 1:
2827                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831                                         break;
2832                                 case 2:
2833                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837                                         break;
2838                                 case 3:
2839                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843                                         break;
2844                                 case 4:
2845                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                                          TILE_SPLIT(split_equal_to_row_size));
2849                                         break;
2850                                 case 5:
2851                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854                                         break;
2855                                 case 6:
2856                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860                                         break;
2861                                 case 7:
2862                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                                          TILE_SPLIT(split_equal_to_row_size));
2866                                         break;
2867                                 case 8:
2868                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870                                         break;
2871                                 case 9:
2872                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875                                         break;
2876                                 case 10:
2877                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881                                         break;
2882                                 case 11:
2883                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887                                         break;
2888                                 case 12:
2889                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893                                         break;
2894                                 case 13:
2895                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898                                         break;
2899                                 case 14:
2900                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904                                         break;
2905                                 case 16:
2906                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910                                         break;
2911                                 case 17:
2912                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                                         break;
2917                                 case 27:
2918                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921                                         break;
2922                                 case 28:
2923                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927                                         break;
2928                                 case 29:
2929                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                                         break;
2934                                 case 30:
2935                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                                         break;
2940                                 default:
2941                                         gb_tile_moden = 0;
2942                                         break;
2943                                 }
2944                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946                         }
2947                 } else if (num_rbs < 4) {
2948                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949                                 switch (reg_offset) {
2950                                 case 0:
2951                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955                                         break;
2956                                 case 1:
2957                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961                                         break;
2962                                 case 2:
2963                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967                                         break;
2968                                 case 3:
2969                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973                                         break;
2974                                 case 4:
2975                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978                                                          TILE_SPLIT(split_equal_to_row_size));
2979                                         break;
2980                                 case 5:
2981                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984                                         break;
2985                                 case 6:
2986                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990                                         break;
2991                                 case 7:
2992                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995                                                          TILE_SPLIT(split_equal_to_row_size));
2996                                         break;
2997                                 case 8:
2998                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000                                         break;
3001                                 case 9:
3002                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005                                         break;
3006                                 case 10:
3007                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011                                         break;
3012                                 case 11:
3013                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                                         break;
3018                                 case 12:
3019                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023                                         break;
3024                                 case 13:
3025                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028                                         break;
3029                                 case 14:
3030                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034                                         break;
3035                                 case 16:
3036                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040                                         break;
3041                                 case 17:
3042                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046                                         break;
3047                                 case 27:
3048                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051                                         break;
3052                                 case 28:
3053                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057                                         break;
3058                                 case 29:
3059                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063                                         break;
3064                                 case 30:
3065                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069                                         break;
3070                                 default:
3071                                         gb_tile_moden = 0;
3072                                         break;
3073                                 }
3074                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076                         }
3077                 }
3078                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079                         switch (reg_offset) {
3080                         case 0:
3081                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3085                                 break;
3086                         case 1:
3087                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3091                                 break;
3092                         case 2:
3093                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3097                                 break;
3098                         case 3:
3099                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3103                                 break;
3104                         case 4:
3105                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3109                                 break;
3110                         case 5:
3111                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3115                                 break;
3116                         case 6:
3117                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3121                                 break;
3122                         case 8:
3123                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3127                                 break;
3128                         case 9:
3129                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3133                                 break;
3134                         case 10:
3135                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3139                                 break;
3140                         case 11:
3141                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3145                                 break;
3146                         case 12:
3147                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3151                                 break;
3152                         case 13:
3153                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3157                                 break;
3158                         case 14:
3159                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3163                                 break;
3164                         default:
3165                                 gb_tile_moden = 0;
3166                                 break;
3167                         }
3168                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170                 }
3171         } else if (num_pipe_configs == 2) {
3172                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173                         switch (reg_offset) {
3174                         case 0:
3175                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179                                 break;
3180                         case 1:
3181                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3184                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185                                 break;
3186                         case 2:
3187                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191                                 break;
3192                         case 3:
3193                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3196                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197                                 break;
3198                         case 4:
3199                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                                  TILE_SPLIT(split_equal_to_row_size));
3203                                 break;
3204                         case 5:
3205                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3207                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208                                 break;
3209                         case 6:
3210                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3213                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214                                 break;
3215                         case 7:
3216                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3219                                                  TILE_SPLIT(split_equal_to_row_size));
3220                                 break;
3221                         case 8:
3222                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223                                                 PIPE_CONFIG(ADDR_SURF_P2);
3224                                 break;
3225                         case 9:
3226                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228                                                  PIPE_CONFIG(ADDR_SURF_P2));
3229                                 break;
3230                         case 10:
3231                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235                                 break;
3236                         case 11:
3237                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3240                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241                                 break;
3242                         case 12:
3243                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3246                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247                                 break;
3248                         case 13:
3249                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3251                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252                                 break;
3253                         case 14:
3254                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258                                 break;
3259                         case 16:
3260                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3263                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264                                 break;
3265                         case 17:
3266                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270                                 break;
3271                         case 27:
3272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274                                                  PIPE_CONFIG(ADDR_SURF_P2));
3275                                 break;
3276                         case 28:
3277                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3280                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281                                 break;
3282                         case 29:
3283                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287                                 break;
3288                         case 30:
3289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293                                 break;
3294                         default:
3295                                 gb_tile_moden = 0;
3296                                 break;
3297                         }
3298                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300                 }
3301                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302                         switch (reg_offset) {
3303                         case 0:
3304                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3308                                 break;
3309                         case 1:
3310                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3314                                 break;
3315                         case 2:
3316                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3320                                 break;
3321                         case 3:
3322                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3326                                 break;
3327                         case 4:
3328                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3332                                 break;
3333                         case 5:
3334                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3338                                 break;
3339                         case 6:
3340                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3344                                 break;
3345                         case 8:
3346                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3350                                 break;
3351                         case 9:
3352                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3356                                 break;
3357                         case 10:
3358                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3362                                 break;
3363                         case 11:
3364                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3368                                 break;
3369                         case 12:
3370                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3374                                 break;
3375                         case 13:
3376                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3380                                 break;
3381                         case 14:
3382                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3386                                 break;
3387                         default:
3388                                 gb_tile_moden = 0;
3389                                 break;
3390                         }
3391                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393                 }
3394         } else
3395                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410                              u32 se_num, u32 sh_num)
3411 {
3412         u32 data = INSTANCE_BROADCAST_WRITES;
3413
3414         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416         else if (se_num == 0xffffffff)
3417                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418         else if (sh_num == 0xffffffff)
3419                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420         else
3421                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422         WREG32(GRBM_GFX_INDEX, data);
3423 }
3424
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435         u32 i, mask = 0;
3436
3437         for (i = 0; i < bit_width; i++) {
3438                 mask <<= 1;
3439                 mask |= 1;
3440         }
3441         return mask;
3442 }
3443
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456                               u32 max_rb_num_per_se,
3457                               u32 sh_per_se)
3458 {
3459         u32 data, mask;
3460
3461         data = RREG32(CC_RB_BACKEND_DISABLE);
3462         if (data & 1)
3463                 data &= BACKEND_DISABLE_MASK;
3464         else
3465                 data = 0;
3466         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467
3468         data >>= BACKEND_DISABLE_SHIFT;
3469
3470         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471
3472         return data & mask;
3473 }
3474
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
3485 static void cik_setup_rb(struct radeon_device *rdev,
3486                          u32 se_num, u32 sh_per_se,
3487                          u32 max_rb_num_per_se)
3488 {
3489         int i, j;
3490         u32 data, mask;
3491         u32 disabled_rbs = 0;
3492         u32 enabled_rbs = 0;
3493
3494         mutex_lock(&rdev->grbm_idx_mutex);
3495         for (i = 0; i < se_num; i++) {
3496                 for (j = 0; j < sh_per_se; j++) {
3497                         cik_select_se_sh(rdev, i, j);
3498                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499                         if (rdev->family == CHIP_HAWAII)
3500                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501                         else
3502                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503                 }
3504         }
3505         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506         mutex_unlock(&rdev->grbm_idx_mutex);
3507
3508         mask = 1;
3509         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510                 if (!(disabled_rbs & mask))
3511                         enabled_rbs |= mask;
3512                 mask <<= 1;
3513         }
3514
3515         rdev->config.cik.backend_enable_mask = enabled_rbs;
3516
3517         mutex_lock(&rdev->grbm_idx_mutex);
3518         for (i = 0; i < se_num; i++) {
3519                 cik_select_se_sh(rdev, i, 0xffffffff);
3520                 data = 0;
3521                 for (j = 0; j < sh_per_se; j++) {
3522                         switch (enabled_rbs & 3) {
3523                         case 0:
3524                                 if (j == 0)
3525                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526                                 else
3527                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528                                 break;
3529                         case 1:
3530                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531                                 break;
3532                         case 2:
3533                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534                                 break;
3535                         case 3:
3536                         default:
3537                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538                                 break;
3539                         }
3540                         enabled_rbs >>= 2;
3541                 }
3542                 WREG32(PA_SC_RASTER_CONFIG, data);
3543         }
3544         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545         mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559         u32 mc_shared_chmap, mc_arb_ramcfg;
3560         u32 hdp_host_path_cntl;
3561         u32 tmp;
3562         int i, j;
3563
3564         switch (rdev->family) {
3565         case CHIP_BONAIRE:
3566                 rdev->config.cik.max_shader_engines = 2;
3567                 rdev->config.cik.max_tile_pipes = 4;
3568                 rdev->config.cik.max_cu_per_sh = 7;
3569                 rdev->config.cik.max_sh_per_se = 1;
3570                 rdev->config.cik.max_backends_per_se = 2;
3571                 rdev->config.cik.max_texture_channel_caches = 4;
3572                 rdev->config.cik.max_gprs = 256;
3573                 rdev->config.cik.max_gs_threads = 32;
3574                 rdev->config.cik.max_hw_contexts = 8;
3575
3576                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581                 break;
3582         case CHIP_HAWAII:
3583                 rdev->config.cik.max_shader_engines = 4;
3584                 rdev->config.cik.max_tile_pipes = 16;
3585                 rdev->config.cik.max_cu_per_sh = 11;
3586                 rdev->config.cik.max_sh_per_se = 1;
3587                 rdev->config.cik.max_backends_per_se = 4;
3588                 rdev->config.cik.max_texture_channel_caches = 16;
3589                 rdev->config.cik.max_gprs = 256;
3590                 rdev->config.cik.max_gs_threads = 32;
3591                 rdev->config.cik.max_hw_contexts = 8;
3592
3593                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598                 break;
3599         case CHIP_KAVERI:
3600                 rdev->config.cik.max_shader_engines = 1;
3601                 rdev->config.cik.max_tile_pipes = 4;
3602                 if ((rdev->pdev->device == 0x1304) ||
3603                     (rdev->pdev->device == 0x1305) ||
3604                     (rdev->pdev->device == 0x130C) ||
3605                     (rdev->pdev->device == 0x130F) ||
3606                     (rdev->pdev->device == 0x1310) ||
3607                     (rdev->pdev->device == 0x1311) ||
3608                     (rdev->pdev->device == 0x131C)) {
3609                         rdev->config.cik.max_cu_per_sh = 8;
3610                         rdev->config.cik.max_backends_per_se = 2;
3611                 } else if ((rdev->pdev->device == 0x1309) ||
3612                            (rdev->pdev->device == 0x130A) ||
3613                            (rdev->pdev->device == 0x130D) ||
3614                            (rdev->pdev->device == 0x1313) ||
3615                            (rdev->pdev->device == 0x131D)) {
3616                         rdev->config.cik.max_cu_per_sh = 6;
3617                         rdev->config.cik.max_backends_per_se = 2;
3618                 } else if ((rdev->pdev->device == 0x1306) ||
3619                            (rdev->pdev->device == 0x1307) ||
3620                            (rdev->pdev->device == 0x130B) ||
3621                            (rdev->pdev->device == 0x130E) ||
3622                            (rdev->pdev->device == 0x1315) ||
3623                            (rdev->pdev->device == 0x1318) ||
3624                            (rdev->pdev->device == 0x131B)) {
3625                         rdev->config.cik.max_cu_per_sh = 4;
3626                         rdev->config.cik.max_backends_per_se = 1;
3627                 } else {
3628                         rdev->config.cik.max_cu_per_sh = 3;
3629                         rdev->config.cik.max_backends_per_se = 1;
3630                 }
3631                 rdev->config.cik.max_sh_per_se = 1;
3632                 rdev->config.cik.max_texture_channel_caches = 4;
3633                 rdev->config.cik.max_gprs = 256;
3634                 rdev->config.cik.max_gs_threads = 16;
3635                 rdev->config.cik.max_hw_contexts = 8;
3636
3637                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3638                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3639                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3640                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3641                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3642                 break;
3643         case CHIP_KABINI:
3644         case CHIP_MULLINS:
3645         default:
3646                 rdev->config.cik.max_shader_engines = 1;
3647                 rdev->config.cik.max_tile_pipes = 2;
3648                 rdev->config.cik.max_cu_per_sh = 2;
3649                 rdev->config.cik.max_sh_per_se = 1;
3650                 rdev->config.cik.max_backends_per_se = 1;
3651                 rdev->config.cik.max_texture_channel_caches = 2;
3652                 rdev->config.cik.max_gprs = 256;
3653                 rdev->config.cik.max_gs_threads = 16;
3654                 rdev->config.cik.max_hw_contexts = 8;
3655
3656                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3657                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3658                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3659                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3660                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3661                 break;
3662         }
3663
3664         /* Initialize HDP */
3665         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3666                 WREG32((0x2c14 + j), 0x00000000);
3667                 WREG32((0x2c18 + j), 0x00000000);
3668                 WREG32((0x2c1c + j), 0x00000000);
3669                 WREG32((0x2c20 + j), 0x00000000);
3670                 WREG32((0x2c24 + j), 0x00000000);
3671         }
3672
3673         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3674         WREG32(SRBM_INT_CNTL, 0x1);
3675         WREG32(SRBM_INT_ACK, 0x1);
3676
3677         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3678
3679         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3680         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3681
3682         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3683         rdev->config.cik.mem_max_burst_length_bytes = 256;
3684         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3685         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3686         if (rdev->config.cik.mem_row_size_in_kb > 4)
3687                 rdev->config.cik.mem_row_size_in_kb = 4;
3688         /* XXX use MC settings? */
3689         rdev->config.cik.shader_engine_tile_size = 32;
3690         rdev->config.cik.num_gpus = 1;
3691         rdev->config.cik.multi_gpu_tile_size = 64;
3692
3693         /* fix up row size */
3694         gb_addr_config &= ~ROW_SIZE_MASK;
3695         switch (rdev->config.cik.mem_row_size_in_kb) {
3696         case 1:
3697         default:
3698                 gb_addr_config |= ROW_SIZE(0);
3699                 break;
3700         case 2:
3701                 gb_addr_config |= ROW_SIZE(1);
3702                 break;
3703         case 4:
3704                 gb_addr_config |= ROW_SIZE(2);
3705                 break;
3706         }
3707
3708         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3709          * not have bank info, so create a custom tiling dword.
3710          * bits 3:0   num_pipes
3711          * bits 7:4   num_banks
3712          * bits 11:8  group_size
3713          * bits 15:12 row_size
3714          */
3715         rdev->config.cik.tile_config = 0;
3716         switch (rdev->config.cik.num_tile_pipes) {
3717         case 1:
3718                 rdev->config.cik.tile_config |= (0 << 0);
3719                 break;
3720         case 2:
3721                 rdev->config.cik.tile_config |= (1 << 0);
3722                 break;
3723         case 4:
3724                 rdev->config.cik.tile_config |= (2 << 0);
3725                 break;
3726         case 8:
3727         default:
3728                 /* XXX what about 12? */
3729                 rdev->config.cik.tile_config |= (3 << 0);
3730                 break;
3731         }
3732         rdev->config.cik.tile_config |=
3733                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3734         rdev->config.cik.tile_config |=
3735                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3736         rdev->config.cik.tile_config |=
3737                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3738
3739         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3740         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3741         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3742         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3743         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3744         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3745         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3746         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3747
3748         cik_tiling_mode_table_init(rdev);
3749
3750         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3751                      rdev->config.cik.max_sh_per_se,
3752                      rdev->config.cik.max_backends_per_se);
3753
3754         rdev->config.cik.active_cus = 0;
3755         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3756                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3757                         rdev->config.cik.active_cus +=
3758                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3759                 }
3760         }
3761
3762         /* set HW defaults for 3D engine */
3763         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3764
3765         mutex_lock(&rdev->grbm_idx_mutex);
3766         /*
3767          * making sure that the following register writes will be broadcasted
3768          * to all the shaders
3769          */
3770         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3771         WREG32(SX_DEBUG_1, 0x20);
3772
3773         WREG32(TA_CNTL_AUX, 0x00010000);
3774
3775         tmp = RREG32(SPI_CONFIG_CNTL);
3776         tmp |= 0x03000000;
3777         WREG32(SPI_CONFIG_CNTL, tmp);
3778
3779         WREG32(SQ_CONFIG, 1);
3780
3781         WREG32(DB_DEBUG, 0);
3782
3783         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3784         tmp |= 0x00000400;
3785         WREG32(DB_DEBUG2, tmp);
3786
3787         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3788         tmp |= 0x00020200;
3789         WREG32(DB_DEBUG3, tmp);
3790
3791         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3792         tmp |= 0x00018208;
3793         WREG32(CB_HW_CONTROL, tmp);
3794
3795         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3796
3797         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3798                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3799                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3800                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3801
3802         WREG32(VGT_NUM_INSTANCES, 1);
3803
3804         WREG32(CP_PERFMON_CNTL, 0);
3805
3806         WREG32(SQ_CONFIG, 0);
3807
3808         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3809                                           FORCE_EOV_MAX_REZ_CNT(255)));
3810
3811         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3812                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3813
3814         WREG32(VGT_GS_VERTEX_REUSE, 16);
3815         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3816
3817         tmp = RREG32(HDP_MISC_CNTL);
3818         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3819         WREG32(HDP_MISC_CNTL, tmp);
3820
3821         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3822         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3823
3824         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3825         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3826         mutex_unlock(&rdev->grbm_idx_mutex);
3827
3828         udelay(50);
3829 }
3830
3831 /*
3832  * GPU scratch registers helpers function.
3833  */
3834 /**
3835  * cik_scratch_init - setup driver info for CP scratch regs
3836  *
3837  * @rdev: radeon_device pointer
3838  *
3839  * Set up the number and offset of the CP scratch registers.
3840  * NOTE: use of CP scratch registers is a legacy inferface and
3841  * is not used by default on newer asics (r6xx+).  On newer asics,
3842  * memory buffers are used for fences rather than scratch regs.
3843  */
3844 static void cik_scratch_init(struct radeon_device *rdev)
3845 {
3846         int i;
3847
3848         rdev->scratch.num_reg = 7;
3849         rdev->scratch.reg_base = SCRATCH_REG0;
3850         for (i = 0; i < rdev->scratch.num_reg; i++) {
3851                 rdev->scratch.free[i] = true;
3852                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3853         }
3854 }
3855
3856 /**
3857  * cik_ring_test - basic gfx ring test
3858  *
3859  * @rdev: radeon_device pointer
3860  * @ring: radeon_ring structure holding ring information
3861  *
3862  * Allocate a scratch register and write to it using the gfx ring (CIK).
3863  * Provides a basic gfx ring test to verify that the ring is working.
3864  * Used by cik_cp_gfx_resume();
3865  * Returns 0 on success, error on failure.
3866  */
3867 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3868 {
3869         uint32_t scratch;
3870         uint32_t tmp = 0;
3871         unsigned i;
3872         int r;
3873
3874         r = radeon_scratch_get(rdev, &scratch);
3875         if (r) {
3876                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3877                 return r;
3878         }
3879         WREG32(scratch, 0xCAFEDEAD);
3880         r = radeon_ring_lock(rdev, ring, 3);
3881         if (r) {
3882                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3883                 radeon_scratch_free(rdev, scratch);
3884                 return r;
3885         }
3886         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3887         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3888         radeon_ring_write(ring, 0xDEADBEEF);
3889         radeon_ring_unlock_commit(rdev, ring, false);
3890
3891         for (i = 0; i < rdev->usec_timeout; i++) {
3892                 tmp = RREG32(scratch);
3893                 if (tmp == 0xDEADBEEF)
3894                         break;
3895                 DRM_UDELAY(1);
3896         }
3897         if (i < rdev->usec_timeout) {
3898                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3899         } else {
3900                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3901                           ring->idx, scratch, tmp);
3902                 r = -EINVAL;
3903         }
3904         radeon_scratch_free(rdev, scratch);
3905         return r;
3906 }
3907
3908 /**
3909  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3910  *
3911  * @rdev: radeon_device pointer
3912  * @ridx: radeon ring index
3913  *
3914  * Emits an hdp flush on the cp.
3915  */
3916 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3917                                        int ridx)
3918 {
3919         struct radeon_ring *ring = &rdev->ring[ridx];
3920         u32 ref_and_mask;
3921
3922         switch (ring->idx) {
3923         case CAYMAN_RING_TYPE_CP1_INDEX:
3924         case CAYMAN_RING_TYPE_CP2_INDEX:
3925         default:
3926                 switch (ring->me) {
3927                 case 0:
3928                         ref_and_mask = CP2 << ring->pipe;
3929                         break;
3930                 case 1:
3931                         ref_and_mask = CP6 << ring->pipe;
3932                         break;
3933                 default:
3934                         return;
3935                 }
3936                 break;
3937         case RADEON_RING_TYPE_GFX_INDEX:
3938                 ref_and_mask = CP0;
3939                 break;
3940         }
3941
3942         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3943         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3944                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3945                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3946         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3947         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3948         radeon_ring_write(ring, ref_and_mask);
3949         radeon_ring_write(ring, ref_and_mask);
3950         radeon_ring_write(ring, 0x20); /* poll interval */
3951 }
3952
3953 /**
3954  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3955  *
3956  * @rdev: radeon_device pointer
3957  * @fence: radeon fence object
3958  *
3959  * Emits a fence sequnce number on the gfx ring and flushes
3960  * GPU caches.
3961  */
3962 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3963                              struct radeon_fence *fence)
3964 {
3965         struct radeon_ring *ring = &rdev->ring[fence->ring];
3966         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3967
3968         /* Workaround for cache flush problems. First send a dummy EOP
3969          * event down the pipe with seq one below.
3970          */
3971         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3972         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3973                                  EOP_TC_ACTION_EN |
3974                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3975                                  EVENT_INDEX(5)));
3976         radeon_ring_write(ring, addr & 0xfffffffc);
3977         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3978                                 DATA_SEL(1) | INT_SEL(0));
3979         radeon_ring_write(ring, fence->seq - 1);
3980         radeon_ring_write(ring, 0);
3981
3982         /* Then send the real EOP event down the pipe. */
3983         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3984         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985                                  EOP_TC_ACTION_EN |
3986                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987                                  EVENT_INDEX(5)));
3988         radeon_ring_write(ring, addr & 0xfffffffc);
3989         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3990         radeon_ring_write(ring, fence->seq);
3991         radeon_ring_write(ring, 0);
3992 }
3993
3994 /**
3995  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3996  *
3997  * @rdev: radeon_device pointer
3998  * @fence: radeon fence object
3999  *
4000  * Emits a fence sequnce number on the compute ring and flushes
4001  * GPU caches.
4002  */
4003 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
4004                                  struct radeon_fence *fence)
4005 {
4006         struct radeon_ring *ring = &rdev->ring[fence->ring];
4007         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4008
4009         /* RELEASE_MEM - flush caches, send int */
4010         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4011         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4012                                  EOP_TC_ACTION_EN |
4013                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4014                                  EVENT_INDEX(5)));
4015         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4016         radeon_ring_write(ring, addr & 0xfffffffc);
4017         radeon_ring_write(ring, upper_32_bits(addr));
4018         radeon_ring_write(ring, fence->seq);
4019         radeon_ring_write(ring, 0);
4020 }
4021
4022 /**
4023  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4024  *
4025  * @rdev: radeon_device pointer
4026  * @ring: radeon ring buffer object
4027  * @semaphore: radeon semaphore object
4028  * @emit_wait: Is this a sempahore wait?
4029  *
4030  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4031  * from running ahead of semaphore waits.
4032  */
4033 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4034                              struct radeon_ring *ring,
4035                              struct radeon_semaphore *semaphore,
4036                              bool emit_wait)
4037 {
4038         uint64_t addr = semaphore->gpu_addr;
4039         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4040
4041         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4042         radeon_ring_write(ring, lower_32_bits(addr));
4043         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4044
4045         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4046                 /* Prevent the PFP from running ahead of the semaphore wait */
4047                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4048                 radeon_ring_write(ring, 0x0);
4049         }
4050
4051         return true;
4052 }
4053
4054 /**
4055  * cik_copy_cpdma - copy pages using the CP DMA engine
4056  *
4057  * @rdev: radeon_device pointer
4058  * @src_offset: src GPU address
4059  * @dst_offset: dst GPU address
4060  * @num_gpu_pages: number of GPU pages to xfer
4061  * @resv: reservation object to sync to
4062  *
4063  * Copy GPU paging using the CP DMA engine (CIK+).
4064  * Used by the radeon ttm implementation to move pages if
4065  * registered as the asic copy callback.
4066  */
4067 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4068                                     uint64_t src_offset, uint64_t dst_offset,
4069                                     unsigned num_gpu_pages,
4070                                     struct reservation_object *resv)
4071 {
4072         struct radeon_fence *fence;
4073         struct radeon_sync sync;
4074         int ring_index = rdev->asic->copy.blit_ring_index;
4075         struct radeon_ring *ring = &rdev->ring[ring_index];
4076         u32 size_in_bytes, cur_size_in_bytes, control;
4077         int i, num_loops;
4078         int r = 0;
4079
4080         radeon_sync_create(&sync);
4081
4082         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4083         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4084         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4085         if (r) {
4086                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4087                 radeon_sync_free(rdev, &sync, NULL);
4088                 return ERR_PTR(r);
4089         }
4090
4091         radeon_sync_resv(rdev, &sync, resv, false);
4092         radeon_sync_rings(rdev, &sync, ring->idx);
4093
4094         for (i = 0; i < num_loops; i++) {
4095                 cur_size_in_bytes = size_in_bytes;
4096                 if (cur_size_in_bytes > 0x1fffff)
4097                         cur_size_in_bytes = 0x1fffff;
4098                 size_in_bytes -= cur_size_in_bytes;
4099                 control = 0;
4100                 if (size_in_bytes == 0)
4101                         control |= PACKET3_DMA_DATA_CP_SYNC;
4102                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4103                 radeon_ring_write(ring, control);
4104                 radeon_ring_write(ring, lower_32_bits(src_offset));
4105                 radeon_ring_write(ring, upper_32_bits(src_offset));
4106                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4107                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4108                 radeon_ring_write(ring, cur_size_in_bytes);
4109                 src_offset += cur_size_in_bytes;
4110                 dst_offset += cur_size_in_bytes;
4111         }
4112
4113         r = radeon_fence_emit(rdev, &fence, ring->idx);
4114         if (r) {
4115                 radeon_ring_unlock_undo(rdev, ring);
4116                 radeon_sync_free(rdev, &sync, NULL);
4117                 return ERR_PTR(r);
4118         }
4119
4120         radeon_ring_unlock_commit(rdev, ring, false);
4121         radeon_sync_free(rdev, &sync, fence);
4122
4123         return fence;
4124 }
4125
4126 /*
4127  * IB stuff
4128  */
4129 /**
4130  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4131  *
4132  * @rdev: radeon_device pointer
4133  * @ib: radeon indirect buffer object
4134  *
4135  * Emits an DE (drawing engine) or CE (constant engine) IB
4136  * on the gfx ring.  IBs are usually generated by userspace
4137  * acceleration drivers and submitted to the kernel for
4138  * sheduling on the ring.  This function schedules the IB
4139  * on the gfx ring for execution by the GPU.
4140  */
4141 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4142 {
4143         struct radeon_ring *ring = &rdev->ring[ib->ring];
4144         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4145         u32 header, control = INDIRECT_BUFFER_VALID;
4146
4147         if (ib->is_const_ib) {
4148                 /* set switch buffer packet before const IB */
4149                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4150                 radeon_ring_write(ring, 0);
4151
4152                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4153         } else {
4154                 u32 next_rptr;
4155                 if (ring->rptr_save_reg) {
4156                         next_rptr = ring->wptr + 3 + 4;
4157                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4158                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4159                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4160                         radeon_ring_write(ring, next_rptr);
4161                 } else if (rdev->wb.enabled) {
4162                         next_rptr = ring->wptr + 5 + 4;
4163                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4164                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4165                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4166                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4167                         radeon_ring_write(ring, next_rptr);
4168                 }
4169
4170                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4171         }
4172
4173         control |= ib->length_dw | (vm_id << 24);
4174
4175         radeon_ring_write(ring, header);
4176         radeon_ring_write(ring,
4177 #ifdef __BIG_ENDIAN
4178                           (2 << 0) |
4179 #endif
4180                           (ib->gpu_addr & 0xFFFFFFFC));
4181         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4182         radeon_ring_write(ring, control);
4183 }
4184
4185 /**
4186  * cik_ib_test - basic gfx ring IB test
4187  *
4188  * @rdev: radeon_device pointer
4189  * @ring: radeon_ring structure holding ring information
4190  *
4191  * Allocate an IB and execute it on the gfx ring (CIK).
4192  * Provides a basic gfx ring test to verify that IBs are working.
4193  * Returns 0 on success, error on failure.
4194  */
4195 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4196 {
4197         struct radeon_ib ib;
4198         uint32_t scratch;
4199         uint32_t tmp = 0;
4200         unsigned i;
4201         int r;
4202
4203         r = radeon_scratch_get(rdev, &scratch);
4204         if (r) {
4205                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4206                 return r;
4207         }
4208         WREG32(scratch, 0xCAFEDEAD);
4209         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4210         if (r) {
4211                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4212                 radeon_scratch_free(rdev, scratch);
4213                 return r;
4214         }
4215         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4216         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4217         ib.ptr[2] = 0xDEADBEEF;
4218         ib.length_dw = 3;
4219         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4220         if (r) {
4221                 radeon_scratch_free(rdev, scratch);
4222                 radeon_ib_free(rdev, &ib);
4223                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4224                 return r;
4225         }
4226         r = radeon_fence_wait(ib.fence, false);
4227         if (r) {
4228                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4229                 radeon_scratch_free(rdev, scratch);
4230                 radeon_ib_free(rdev, &ib);
4231                 return r;
4232         }
4233         for (i = 0; i < rdev->usec_timeout; i++) {
4234                 tmp = RREG32(scratch);
4235                 if (tmp == 0xDEADBEEF)
4236                         break;
4237                 DRM_UDELAY(1);
4238         }
4239         if (i < rdev->usec_timeout) {
4240                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4241         } else {
4242                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4243                           scratch, tmp);
4244                 r = -EINVAL;
4245         }
4246         radeon_scratch_free(rdev, scratch);
4247         radeon_ib_free(rdev, &ib);
4248         return r;
4249 }
4250
4251 /*
4252  * CP.
4253  * On CIK, gfx and compute now have independant command processors.
4254  *
4255  * GFX
4256  * Gfx consists of a single ring and can process both gfx jobs and
4257  * compute jobs.  The gfx CP consists of three microengines (ME):
4258  * PFP - Pre-Fetch Parser
4259  * ME - Micro Engine
4260  * CE - Constant Engine
4261  * The PFP and ME make up what is considered the Drawing Engine (DE).
4262  * The CE is an asynchronous engine used for updating buffer desciptors
4263  * used by the DE so that they can be loaded into cache in parallel
4264  * while the DE is processing state update packets.
4265  *
4266  * Compute
4267  * The compute CP consists of two microengines (ME):
4268  * MEC1 - Compute MicroEngine 1
4269  * MEC2 - Compute MicroEngine 2
4270  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4271  * The queues are exposed to userspace and are programmed directly
4272  * by the compute runtime.
4273  */
4274 /**
4275  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4276  *
4277  * @rdev: radeon_device pointer
4278  * @enable: enable or disable the MEs
4279  *
4280  * Halts or unhalts the gfx MEs.
4281  */
4282 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4283 {
4284         if (enable)
4285                 WREG32(CP_ME_CNTL, 0);
4286         else {
4287                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4288                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4289                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4290                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4291         }
4292         udelay(50);
4293 }
4294
4295 /**
4296  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4297  *
4298  * @rdev: radeon_device pointer
4299  *
4300  * Loads the gfx PFP, ME, and CE ucode.
4301  * Returns 0 for success, -EINVAL if the ucode is not available.
4302  */
4303 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4304 {
4305         int i;
4306
4307         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4308                 return -EINVAL;
4309
4310         cik_cp_gfx_enable(rdev, false);
4311
4312         if (rdev->new_fw) {
4313                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4314                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4315                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4316                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4317                 const struct gfx_firmware_header_v1_0 *me_hdr =
4318                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4319                 const __le32 *fw_data;
4320                 u32 fw_size;
4321
4322                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4323                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4324                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4325
4326                 /* PFP */
4327                 fw_data = (const __le32 *)
4328                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4329                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4330                 WREG32(CP_PFP_UCODE_ADDR, 0);
4331                 for (i = 0; i < fw_size; i++)
4332                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4333                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4334
4335                 /* CE */
4336                 fw_data = (const __le32 *)
4337                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4338                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4339                 WREG32(CP_CE_UCODE_ADDR, 0);
4340                 for (i = 0; i < fw_size; i++)
4341                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4342                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4343
4344                 /* ME */
4345                 fw_data = (const __be32 *)
4346                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4347                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4348                 WREG32(CP_ME_RAM_WADDR, 0);
4349                 for (i = 0; i < fw_size; i++)
4350                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4351                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4352                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4353         } else {
4354                 const __be32 *fw_data;
4355
4356                 /* PFP */
4357                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4358                 WREG32(CP_PFP_UCODE_ADDR, 0);
4359                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4360                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4361                 WREG32(CP_PFP_UCODE_ADDR, 0);
4362
4363                 /* CE */
4364                 fw_data = (const __be32 *)rdev->ce_fw->data;
4365                 WREG32(CP_CE_UCODE_ADDR, 0);
4366                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4367                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4368                 WREG32(CP_CE_UCODE_ADDR, 0);
4369
4370                 /* ME */
4371                 fw_data = (const __be32 *)rdev->me_fw->data;
4372                 WREG32(CP_ME_RAM_WADDR, 0);
4373                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4374                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4375                 WREG32(CP_ME_RAM_WADDR, 0);
4376         }
4377
4378         return 0;
4379 }
4380
4381 /**
4382  * cik_cp_gfx_start - start the gfx ring
4383  *
4384  * @rdev: radeon_device pointer
4385  *
4386  * Enables the ring and loads the clear state context and other
4387  * packets required to init the ring.
4388  * Returns 0 for success, error for failure.
4389  */
4390 static int cik_cp_gfx_start(struct radeon_device *rdev)
4391 {
4392         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4393         int r, i;
4394
4395         /* init the CP */
4396         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4397         WREG32(CP_ENDIAN_SWAP, 0);
4398         WREG32(CP_DEVICE_ID, 1);
4399
4400         cik_cp_gfx_enable(rdev, true);
4401
4402         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4403         if (r) {
4404                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4405                 return r;
4406         }
4407
4408         /* init the CE partitions.  CE only used for gfx on CIK */
4409         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4410         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4411         radeon_ring_write(ring, 0x8000);
4412         radeon_ring_write(ring, 0x8000);
4413
4414         /* setup clear context state */
4415         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4416         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4417
4418         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4419         radeon_ring_write(ring, 0x80000000);
4420         radeon_ring_write(ring, 0x80000000);
4421
4422         for (i = 0; i < cik_default_size; i++)
4423                 radeon_ring_write(ring, cik_default_state[i]);
4424
4425         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4426         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4427
4428         /* set clear context state */
4429         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4430         radeon_ring_write(ring, 0);
4431
4432         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4433         radeon_ring_write(ring, 0x00000316);
4434         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4435         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4436
4437         radeon_ring_unlock_commit(rdev, ring, false);
4438
4439         return 0;
4440 }
4441
4442 /**
4443  * cik_cp_gfx_fini - stop the gfx ring
4444  *
4445  * @rdev: radeon_device pointer
4446  *
4447  * Stop the gfx ring and tear down the driver ring
4448  * info.
4449  */
4450 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4451 {
4452         cik_cp_gfx_enable(rdev, false);
4453         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4454 }
4455
4456 /**
4457  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4458  *
4459  * @rdev: radeon_device pointer
4460  *
4461  * Program the location and size of the gfx ring buffer
4462  * and test it to make sure it's working.
4463  * Returns 0 for success, error for failure.
4464  */
4465 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4466 {
4467         struct radeon_ring *ring;
4468         u32 tmp;
4469         u32 rb_bufsz;
4470         u64 rb_addr;
4471         int r;
4472
4473         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4474         if (rdev->family != CHIP_HAWAII)
4475                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4476
4477         /* Set the write pointer delay */
4478         WREG32(CP_RB_WPTR_DELAY, 0);
4479
4480         /* set the RB to use vmid 0 */
4481         WREG32(CP_RB_VMID, 0);
4482
4483         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4484
4485         /* ring 0 - compute and gfx */
4486         /* Set ring buffer size */
4487         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4488         rb_bufsz = order_base_2(ring->ring_size / 8);
4489         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4490 #ifdef __BIG_ENDIAN
4491         tmp |= BUF_SWAP_32BIT;
4492 #endif
4493         WREG32(CP_RB0_CNTL, tmp);
4494
4495         /* Initialize the ring buffer's read and write pointers */
4496         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4497         ring->wptr = 0;
4498         WREG32(CP_RB0_WPTR, ring->wptr);
4499
4500         /* set the wb address wether it's enabled or not */
4501         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4502         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4503
4504         /* scratch register shadowing is no longer supported */
4505         WREG32(SCRATCH_UMSK, 0);
4506
4507         if (!rdev->wb.enabled)
4508                 tmp |= RB_NO_UPDATE;
4509
4510         mdelay(1);
4511         WREG32(CP_RB0_CNTL, tmp);
4512
4513         rb_addr = ring->gpu_addr >> 8;
4514         WREG32(CP_RB0_BASE, rb_addr);
4515         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4516
4517         /* start the ring */
4518         cik_cp_gfx_start(rdev);
4519         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4520         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4521         if (r) {
4522                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4523                 return r;
4524         }
4525
4526         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4527                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4528
4529         return 0;
4530 }
4531
4532 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4533                      struct radeon_ring *ring)
4534 {
4535         u32 rptr;
4536
4537         if (rdev->wb.enabled)
4538                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4539         else
4540                 rptr = RREG32(CP_RB0_RPTR);
4541
4542         return rptr;
4543 }
4544
4545 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4546                      struct radeon_ring *ring)
4547 {
4548         u32 wptr;
4549
4550         wptr = RREG32(CP_RB0_WPTR);
4551
4552         return wptr;
4553 }
4554
4555 void cik_gfx_set_wptr(struct radeon_device *rdev,
4556                       struct radeon_ring *ring)
4557 {
4558         WREG32(CP_RB0_WPTR, ring->wptr);
4559         (void)RREG32(CP_RB0_WPTR);
4560 }
4561
4562 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4563                          struct radeon_ring *ring)
4564 {
4565         u32 rptr;
4566
4567         if (rdev->wb.enabled) {
4568                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4569         } else {
4570                 mutex_lock(&rdev->srbm_mutex);
4571                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4572                 rptr = RREG32(CP_HQD_PQ_RPTR);
4573                 cik_srbm_select(rdev, 0, 0, 0, 0);
4574                 mutex_unlock(&rdev->srbm_mutex);
4575         }
4576
4577         return rptr;
4578 }
4579
4580 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4581                          struct radeon_ring *ring)
4582 {
4583         u32 wptr;
4584
4585         if (rdev->wb.enabled) {
4586                 /* XXX check if swapping is necessary on BE */
4587                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4588         } else {
4589                 mutex_lock(&rdev->srbm_mutex);
4590                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4591                 wptr = RREG32(CP_HQD_PQ_WPTR);
4592                 cik_srbm_select(rdev, 0, 0, 0, 0);
4593                 mutex_unlock(&rdev->srbm_mutex);
4594         }
4595
4596         return wptr;
4597 }
4598
4599 void cik_compute_set_wptr(struct radeon_device *rdev,
4600                           struct radeon_ring *ring)
4601 {
4602         /* XXX check if swapping is necessary on BE */
4603         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4604         WDOORBELL32(ring->doorbell_index, ring->wptr);
4605 }
4606
4607 static void cik_compute_stop(struct radeon_device *rdev,
4608                              struct radeon_ring *ring)
4609 {
4610         u32 j, tmp;
4611
4612         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4613         /* Disable wptr polling. */
4614         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4615         tmp &= ~WPTR_POLL_EN;
4616         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4617         /* Disable HQD. */
4618         if (RREG32(CP_HQD_ACTIVE) & 1) {
4619                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4620                 for (j = 0; j < rdev->usec_timeout; j++) {
4621                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4622                                 break;
4623                         udelay(1);
4624                 }
4625                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4626                 WREG32(CP_HQD_PQ_RPTR, 0);
4627                 WREG32(CP_HQD_PQ_WPTR, 0);
4628         }
4629         cik_srbm_select(rdev, 0, 0, 0, 0);
4630 }
4631
4632 /**
4633  * cik_cp_compute_enable - enable/disable the compute CP MEs
4634  *
4635  * @rdev: radeon_device pointer
4636  * @enable: enable or disable the MEs
4637  *
4638  * Halts or unhalts the compute MEs.
4639  */
4640 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4641 {
4642         if (enable)
4643                 WREG32(CP_MEC_CNTL, 0);
4644         else {
4645                 /*
4646                  * To make hibernation reliable we need to clear compute ring
4647                  * configuration before halting the compute ring.
4648                  */
4649                 mutex_lock(&rdev->srbm_mutex);
4650                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4651                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4652                 mutex_unlock(&rdev->srbm_mutex);
4653
4654                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4655                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4656                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4657         }
4658         udelay(50);
4659 }
4660
4661 /**
4662  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4663  *
4664  * @rdev: radeon_device pointer
4665  *
4666  * Loads the compute MEC1&2 ucode.
4667  * Returns 0 for success, -EINVAL if the ucode is not available.
4668  */
4669 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4670 {
4671         int i;
4672
4673         if (!rdev->mec_fw)
4674                 return -EINVAL;
4675
4676         cik_cp_compute_enable(rdev, false);
4677
4678         if (rdev->new_fw) {
4679                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4680                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4681                 const __le32 *fw_data;
4682                 u32 fw_size;
4683
4684                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4685
4686                 /* MEC1 */
4687                 fw_data = (const __le32 *)
4688                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4689                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4690                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4691                 for (i = 0; i < fw_size; i++)
4692                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4693                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4694
4695                 /* MEC2 */
4696                 if (rdev->family == CHIP_KAVERI) {
4697                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4698                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4699
4700                         fw_data = (const __le32 *)
4701                                 (rdev->mec2_fw->data +
4702                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4703                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4704                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4705                         for (i = 0; i < fw_size; i++)
4706                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4707                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4708                 }
4709         } else {
4710                 const __be32 *fw_data;
4711
4712                 /* MEC1 */
4713                 fw_data = (const __be32 *)rdev->mec_fw->data;
4714                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4715                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4716                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4717                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4718
4719                 if (rdev->family == CHIP_KAVERI) {
4720                         /* MEC2 */
4721                         fw_data = (const __be32 *)rdev->mec_fw->data;
4722                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4723                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4724                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4725                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4726                 }
4727         }
4728
4729         return 0;
4730 }
4731
4732 /**
4733  * cik_cp_compute_start - start the compute queues
4734  *
4735  * @rdev: radeon_device pointer
4736  *
4737  * Enable the compute queues.
4738  * Returns 0 for success, error for failure.
4739  */
4740 static int cik_cp_compute_start(struct radeon_device *rdev)
4741 {
4742         cik_cp_compute_enable(rdev, true);
4743
4744         return 0;
4745 }
4746
4747 /**
4748  * cik_cp_compute_fini - stop the compute queues
4749  *
4750  * @rdev: radeon_device pointer
4751  *
4752  * Stop the compute queues and tear down the driver queue
4753  * info.
4754  */
4755 static void cik_cp_compute_fini(struct radeon_device *rdev)
4756 {
4757         int i, idx, r;
4758
4759         cik_cp_compute_enable(rdev, false);
4760
4761         for (i = 0; i < 2; i++) {
4762                 if (i == 0)
4763                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4764                 else
4765                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4766
4767                 if (rdev->ring[idx].mqd_obj) {
4768                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4769                         if (unlikely(r != 0))
4770                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4771
4772                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4773                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4774
4775                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4776                         rdev->ring[idx].mqd_obj = NULL;
4777                 }
4778         }
4779 }
4780
4781 static void cik_mec_fini(struct radeon_device *rdev)
4782 {
4783         int r;
4784
4785         if (rdev->mec.hpd_eop_obj) {
4786                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4787                 if (unlikely(r != 0))
4788                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4789                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4790                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4791
4792                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4793                 rdev->mec.hpd_eop_obj = NULL;
4794         }
4795 }
4796
4797 #define MEC_HPD_SIZE 2048
4798
4799 static int cik_mec_init(struct radeon_device *rdev)
4800 {
4801         int r;
4802         u32 *hpd;
4803
4804         /*
4805          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4806          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4807          * Nonetheless, we assign only 1 pipe because all other pipes will
4808          * be handled by KFD
4809          */
4810         rdev->mec.num_mec = 1;
4811         rdev->mec.num_pipe = 1;
4812         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4813
4814         if (rdev->mec.hpd_eop_obj == NULL) {
4815                 r = radeon_bo_create(rdev,
4816                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4817                                      PAGE_SIZE, true,
4818                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4819                                      &rdev->mec.hpd_eop_obj);
4820                 if (r) {
4821                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4822                         return r;
4823                 }
4824         }
4825
4826         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4827         if (unlikely(r != 0)) {
4828                 cik_mec_fini(rdev);
4829                 return r;
4830         }
4831         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4832                           &rdev->mec.hpd_eop_gpu_addr);
4833         if (r) {
4834                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4835                 cik_mec_fini(rdev);
4836                 return r;
4837         }
4838         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4839         if (r) {
4840                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4841                 cik_mec_fini(rdev);
4842                 return r;
4843         }
4844
4845         /* clear memory.  Not sure if this is required or not */
4846         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4847
4848         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4849         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4850
4851         return 0;
4852 }
4853
4854 struct hqd_registers
4855 {
4856         u32 cp_mqd_base_addr;
4857         u32 cp_mqd_base_addr_hi;
4858         u32 cp_hqd_active;
4859         u32 cp_hqd_vmid;
4860         u32 cp_hqd_persistent_state;
4861         u32 cp_hqd_pipe_priority;
4862         u32 cp_hqd_queue_priority;
4863         u32 cp_hqd_quantum;
4864         u32 cp_hqd_pq_base;
4865         u32 cp_hqd_pq_base_hi;
4866         u32 cp_hqd_pq_rptr;
4867         u32 cp_hqd_pq_rptr_report_addr;
4868         u32 cp_hqd_pq_rptr_report_addr_hi;
4869         u32 cp_hqd_pq_wptr_poll_addr;
4870         u32 cp_hqd_pq_wptr_poll_addr_hi;
4871         u32 cp_hqd_pq_doorbell_control;
4872         u32 cp_hqd_pq_wptr;
4873         u32 cp_hqd_pq_control;
4874         u32 cp_hqd_ib_base_addr;
4875         u32 cp_hqd_ib_base_addr_hi;
4876         u32 cp_hqd_ib_rptr;
4877         u32 cp_hqd_ib_control;
4878         u32 cp_hqd_iq_timer;
4879         u32 cp_hqd_iq_rptr;
4880         u32 cp_hqd_dequeue_request;
4881         u32 cp_hqd_dma_offload;
4882         u32 cp_hqd_sema_cmd;
4883         u32 cp_hqd_msg_type;
4884         u32 cp_hqd_atomic0_preop_lo;
4885         u32 cp_hqd_atomic0_preop_hi;
4886         u32 cp_hqd_atomic1_preop_lo;
4887         u32 cp_hqd_atomic1_preop_hi;
4888         u32 cp_hqd_hq_scheduler0;
4889         u32 cp_hqd_hq_scheduler1;
4890         u32 cp_mqd_control;
4891 };
4892
4893 struct bonaire_mqd
4894 {
4895         u32 header;
4896         u32 dispatch_initiator;
4897         u32 dimensions[3];
4898         u32 start_idx[3];
4899         u32 num_threads[3];
4900         u32 pipeline_stat_enable;
4901         u32 perf_counter_enable;
4902         u32 pgm[2];
4903         u32 tba[2];
4904         u32 tma[2];
4905         u32 pgm_rsrc[2];
4906         u32 vmid;
4907         u32 resource_limits;
4908         u32 static_thread_mgmt01[2];
4909         u32 tmp_ring_size;
4910         u32 static_thread_mgmt23[2];
4911         u32 restart[3];
4912         u32 thread_trace_enable;
4913         u32 reserved1;
4914         u32 user_data[16];
4915         u32 vgtcs_invoke_count[2];
4916         struct hqd_registers queue_state;
4917         u32 dequeue_cntr;
4918         u32 interrupt_queue[64];
4919 };
4920
4921 /**
4922  * cik_cp_compute_resume - setup the compute queue registers
4923  *
4924  * @rdev: radeon_device pointer
4925  *
4926  * Program the compute queues and test them to make sure they
4927  * are working.
4928  * Returns 0 for success, error for failure.
4929  */
4930 static int cik_cp_compute_resume(struct radeon_device *rdev)
4931 {
4932         int r, i, j, idx;
4933         u32 tmp;
4934         bool use_doorbell = true;
4935         u64 hqd_gpu_addr;
4936         u64 mqd_gpu_addr;
4937         u64 eop_gpu_addr;
4938         u64 wb_gpu_addr;
4939         u32 *buf;
4940         struct bonaire_mqd *mqd;
4941
4942         r = cik_cp_compute_start(rdev);
4943         if (r)
4944                 return r;
4945
4946         /* fix up chicken bits */
4947         tmp = RREG32(CP_CPF_DEBUG);
4948         tmp |= (1 << 23);
4949         WREG32(CP_CPF_DEBUG, tmp);
4950
4951         /* init the pipes */
4952         mutex_lock(&rdev->srbm_mutex);
4953
4954         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4955
4956         cik_srbm_select(rdev, 0, 0, 0, 0);
4957
4958         /* write the EOP addr */
4959         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4960         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4961
4962         /* set the VMID assigned */
4963         WREG32(CP_HPD_EOP_VMID, 0);
4964
4965         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4966         tmp = RREG32(CP_HPD_EOP_CONTROL);
4967         tmp &= ~EOP_SIZE_MASK;
4968         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4969         WREG32(CP_HPD_EOP_CONTROL, tmp);
4970
4971         mutex_unlock(&rdev->srbm_mutex);
4972
4973         /* init the queues.  Just two for now. */
4974         for (i = 0; i < 2; i++) {
4975                 if (i == 0)
4976                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4977                 else
4978                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4979
4980                 if (rdev->ring[idx].mqd_obj == NULL) {
4981                         r = radeon_bo_create(rdev,
4982                                              sizeof(struct bonaire_mqd),
4983                                              PAGE_SIZE, true,
4984                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4985                                              NULL, &rdev->ring[idx].mqd_obj);
4986                         if (r) {
4987                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4988                                 return r;
4989                         }
4990                 }
4991
4992                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4993                 if (unlikely(r != 0)) {
4994                         cik_cp_compute_fini(rdev);
4995                         return r;
4996                 }
4997                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4998                                   &mqd_gpu_addr);
4999                 if (r) {
5000                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
5001                         cik_cp_compute_fini(rdev);
5002                         return r;
5003                 }
5004                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
5005                 if (r) {
5006                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
5007                         cik_cp_compute_fini(rdev);
5008                         return r;
5009                 }
5010
5011                 /* init the mqd struct */
5012                 memset(buf, 0, sizeof(struct bonaire_mqd));
5013
5014                 mqd = (struct bonaire_mqd *)buf;
5015                 mqd->header = 0xC0310800;
5016                 mqd->static_thread_mgmt01[0] = 0xffffffff;
5017                 mqd->static_thread_mgmt01[1] = 0xffffffff;
5018                 mqd->static_thread_mgmt23[0] = 0xffffffff;
5019                 mqd->static_thread_mgmt23[1] = 0xffffffff;
5020
5021                 mutex_lock(&rdev->srbm_mutex);
5022                 cik_srbm_select(rdev, rdev->ring[idx].me,
5023                                 rdev->ring[idx].pipe,
5024                                 rdev->ring[idx].queue, 0);
5025
5026                 /* disable wptr polling */
5027                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5028                 tmp &= ~WPTR_POLL_EN;
5029                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5030
5031                 /* enable doorbell? */
5032                 mqd->queue_state.cp_hqd_pq_doorbell_control =
5033                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5034                 if (use_doorbell)
5035                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5036                 else
5037                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5038                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5039                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5040
5041                 /* disable the queue if it's active */
5042                 mqd->queue_state.cp_hqd_dequeue_request = 0;
5043                 mqd->queue_state.cp_hqd_pq_rptr = 0;
5044                 mqd->queue_state.cp_hqd_pq_wptr= 0;
5045                 if (RREG32(CP_HQD_ACTIVE) & 1) {
5046                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5047                         for (j = 0; j < rdev->usec_timeout; j++) {
5048                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
5049                                         break;
5050                                 udelay(1);
5051                         }
5052                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5053                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5054                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5055                 }
5056
5057                 /* set the pointer to the MQD */
5058                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5059                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5060                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5061                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5062                 /* set MQD vmid to 0 */
5063                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5064                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5065                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5066
5067                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5068                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5069                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5070                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5071                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5072                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5073
5074                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5075                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5076                 mqd->queue_state.cp_hqd_pq_control &=
5077                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5078
5079                 mqd->queue_state.cp_hqd_pq_control |=
5080                         order_base_2(rdev->ring[idx].ring_size / 8);
5081                 mqd->queue_state.cp_hqd_pq_control |=
5082                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5083 #ifdef __BIG_ENDIAN
5084                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5085 #endif
5086                 mqd->queue_state.cp_hqd_pq_control &=
5087                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5088                 mqd->queue_state.cp_hqd_pq_control |=
5089                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5090                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5091
5092                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5093                 if (i == 0)
5094                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5095                 else
5096                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5097                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5098                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5099                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5100                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5101                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5102
5103                 /* set the wb address wether it's enabled or not */
5104                 if (i == 0)
5105                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5106                 else
5107                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5108                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5109                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5110                         upper_32_bits(wb_gpu_addr) & 0xffff;
5111                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5112                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5113                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5114                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5115
5116                 /* enable the doorbell if requested */
5117                 if (use_doorbell) {
5118                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5119                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5120                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5121                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5122                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5123                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5124                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5125                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5126
5127                 } else {
5128                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5129                 }
5130                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5131                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5132
5133                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5134                 rdev->ring[idx].wptr = 0;
5135                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5136                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5137                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5138
5139                 /* set the vmid for the queue */
5140                 mqd->queue_state.cp_hqd_vmid = 0;
5141                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5142
5143                 /* activate the queue */
5144                 mqd->queue_state.cp_hqd_active = 1;
5145                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5146
5147                 cik_srbm_select(rdev, 0, 0, 0, 0);
5148                 mutex_unlock(&rdev->srbm_mutex);
5149
5150                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5151                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5152
5153                 rdev->ring[idx].ready = true;
5154                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5155                 if (r)
5156                         rdev->ring[idx].ready = false;
5157         }
5158
5159         return 0;
5160 }
5161
5162 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5163 {
5164         cik_cp_gfx_enable(rdev, enable);
5165         cik_cp_compute_enable(rdev, enable);
5166 }
5167
5168 static int cik_cp_load_microcode(struct radeon_device *rdev)
5169 {
5170         int r;
5171
5172         r = cik_cp_gfx_load_microcode(rdev);
5173         if (r)
5174                 return r;
5175         r = cik_cp_compute_load_microcode(rdev);
5176         if (r)
5177                 return r;
5178
5179         return 0;
5180 }
5181
5182 static void cik_cp_fini(struct radeon_device *rdev)
5183 {
5184         cik_cp_gfx_fini(rdev);
5185         cik_cp_compute_fini(rdev);
5186 }
5187
5188 static int cik_cp_resume(struct radeon_device *rdev)
5189 {
5190         int r;
5191
5192         cik_enable_gui_idle_interrupt(rdev, false);
5193
5194         r = cik_cp_load_microcode(rdev);
5195         if (r)
5196                 return r;
5197
5198         r = cik_cp_gfx_resume(rdev);
5199         if (r)
5200                 return r;
5201         r = cik_cp_compute_resume(rdev);
5202         if (r)
5203                 return r;
5204
5205         cik_enable_gui_idle_interrupt(rdev, true);
5206
5207         return 0;
5208 }
5209
5210 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5211 {
5212         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5213                 RREG32(GRBM_STATUS));
5214         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5215                 RREG32(GRBM_STATUS2));
5216         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5217                 RREG32(GRBM_STATUS_SE0));
5218         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5219                 RREG32(GRBM_STATUS_SE1));
5220         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5221                 RREG32(GRBM_STATUS_SE2));
5222         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5223                 RREG32(GRBM_STATUS_SE3));
5224         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5225                 RREG32(SRBM_STATUS));
5226         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5227                 RREG32(SRBM_STATUS2));
5228         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5229                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5230         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5231                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5232         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5233         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5234                  RREG32(CP_STALLED_STAT1));
5235         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5236                  RREG32(CP_STALLED_STAT2));
5237         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5238                  RREG32(CP_STALLED_STAT3));
5239         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5240                  RREG32(CP_CPF_BUSY_STAT));
5241         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5242                  RREG32(CP_CPF_STALLED_STAT1));
5243         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5244         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5245         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5246                  RREG32(CP_CPC_STALLED_STAT1));
5247         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5248 }
5249
5250 /**
5251  * cik_gpu_check_soft_reset - check which blocks are busy
5252  *
5253  * @rdev: radeon_device pointer
5254  *
5255  * Check which blocks are busy and return the relevant reset
5256  * mask to be used by cik_gpu_soft_reset().
5257  * Returns a mask of the blocks to be reset.
5258  */
5259 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5260 {
5261         u32 reset_mask = 0;
5262         u32 tmp;
5263
5264         /* GRBM_STATUS */
5265         tmp = RREG32(GRBM_STATUS);
5266         if (tmp & (PA_BUSY | SC_BUSY |
5267                    BCI_BUSY | SX_BUSY |
5268                    TA_BUSY | VGT_BUSY |
5269                    DB_BUSY | CB_BUSY |
5270                    GDS_BUSY | SPI_BUSY |
5271                    IA_BUSY | IA_BUSY_NO_DMA))
5272                 reset_mask |= RADEON_RESET_GFX;
5273
5274         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5275                 reset_mask |= RADEON_RESET_CP;
5276
5277         /* GRBM_STATUS2 */
5278         tmp = RREG32(GRBM_STATUS2);
5279         if (tmp & RLC_BUSY)
5280                 reset_mask |= RADEON_RESET_RLC;
5281
5282         /* SDMA0_STATUS_REG */
5283         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5284         if (!(tmp & SDMA_IDLE))
5285                 reset_mask |= RADEON_RESET_DMA;
5286
5287         /* SDMA1_STATUS_REG */
5288         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5289         if (!(tmp & SDMA_IDLE))
5290                 reset_mask |= RADEON_RESET_DMA1;
5291
5292         /* SRBM_STATUS2 */
5293         tmp = RREG32(SRBM_STATUS2);
5294         if (tmp & SDMA_BUSY)
5295                 reset_mask |= RADEON_RESET_DMA;
5296
5297         if (tmp & SDMA1_BUSY)
5298                 reset_mask |= RADEON_RESET_DMA1;
5299
5300         /* SRBM_STATUS */
5301         tmp = RREG32(SRBM_STATUS);
5302
5303         if (tmp & IH_BUSY)
5304                 reset_mask |= RADEON_RESET_IH;
5305
5306         if (tmp & SEM_BUSY)
5307                 reset_mask |= RADEON_RESET_SEM;
5308
5309         if (tmp & GRBM_RQ_PENDING)
5310                 reset_mask |= RADEON_RESET_GRBM;
5311
5312         if (tmp & VMC_BUSY)
5313                 reset_mask |= RADEON_RESET_VMC;
5314
5315         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5316                    MCC_BUSY | MCD_BUSY))
5317                 reset_mask |= RADEON_RESET_MC;
5318
5319         if (evergreen_is_display_hung(rdev))
5320                 reset_mask |= RADEON_RESET_DISPLAY;
5321
5322         /* Skip MC reset as it's mostly likely not hung, just busy */
5323         if (reset_mask & RADEON_RESET_MC) {
5324                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5325                 reset_mask &= ~RADEON_RESET_MC;
5326         }
5327
5328         return reset_mask;
5329 }
5330
5331 /**
5332  * cik_gpu_soft_reset - soft reset GPU
5333  *
5334  * @rdev: radeon_device pointer
5335  * @reset_mask: mask of which blocks to reset
5336  *
5337  * Soft reset the blocks specified in @reset_mask.
5338  */
5339 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5340 {
5341         struct evergreen_mc_save save;
5342         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343         u32 tmp;
5344
5345         if (reset_mask == 0)
5346                 return;
5347
5348         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5349
5350         cik_print_gpu_status_regs(rdev);
5351         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5352                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5353         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5354                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5355
5356         /* disable CG/PG */
5357         cik_fini_pg(rdev);
5358         cik_fini_cg(rdev);
5359
5360         /* stop the rlc */
5361         cik_rlc_stop(rdev);
5362
5363         /* Disable GFX parsing/prefetching */
5364         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5365
5366         /* Disable MEC parsing/prefetching */
5367         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5368
5369         if (reset_mask & RADEON_RESET_DMA) {
5370                 /* sdma0 */
5371                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5372                 tmp |= SDMA_HALT;
5373                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5374         }
5375         if (reset_mask & RADEON_RESET_DMA1) {
5376                 /* sdma1 */
5377                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5378                 tmp |= SDMA_HALT;
5379                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5380         }
5381
5382         evergreen_mc_stop(rdev, &save);
5383         if (evergreen_mc_wait_for_idle(rdev)) {
5384                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5385         }
5386
5387         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5388                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5389
5390         if (reset_mask & RADEON_RESET_CP) {
5391                 grbm_soft_reset |= SOFT_RESET_CP;
5392
5393                 srbm_soft_reset |= SOFT_RESET_GRBM;
5394         }
5395
5396         if (reset_mask & RADEON_RESET_DMA)
5397                 srbm_soft_reset |= SOFT_RESET_SDMA;
5398
5399         if (reset_mask & RADEON_RESET_DMA1)
5400                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5401
5402         if (reset_mask & RADEON_RESET_DISPLAY)
5403                 srbm_soft_reset |= SOFT_RESET_DC;
5404
5405         if (reset_mask & RADEON_RESET_RLC)
5406                 grbm_soft_reset |= SOFT_RESET_RLC;
5407
5408         if (reset_mask & RADEON_RESET_SEM)
5409                 srbm_soft_reset |= SOFT_RESET_SEM;
5410
5411         if (reset_mask & RADEON_RESET_IH)
5412                 srbm_soft_reset |= SOFT_RESET_IH;
5413
5414         if (reset_mask & RADEON_RESET_GRBM)
5415                 srbm_soft_reset |= SOFT_RESET_GRBM;
5416
5417         if (reset_mask & RADEON_RESET_VMC)
5418                 srbm_soft_reset |= SOFT_RESET_VMC;
5419
5420         if (!(rdev->flags & RADEON_IS_IGP)) {
5421                 if (reset_mask & RADEON_RESET_MC)
5422                         srbm_soft_reset |= SOFT_RESET_MC;
5423         }
5424
5425         if (grbm_soft_reset) {
5426                 tmp = RREG32(GRBM_SOFT_RESET);
5427                 tmp |= grbm_soft_reset;
5428                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5429                 WREG32(GRBM_SOFT_RESET, tmp);
5430                 tmp = RREG32(GRBM_SOFT_RESET);
5431
5432                 udelay(50);
5433
5434                 tmp &= ~grbm_soft_reset;
5435                 WREG32(GRBM_SOFT_RESET, tmp);
5436                 tmp = RREG32(GRBM_SOFT_RESET);
5437         }
5438
5439         if (srbm_soft_reset) {
5440                 tmp = RREG32(SRBM_SOFT_RESET);
5441                 tmp |= srbm_soft_reset;
5442                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5443                 WREG32(SRBM_SOFT_RESET, tmp);
5444                 tmp = RREG32(SRBM_SOFT_RESET);
5445
5446                 udelay(50);
5447
5448                 tmp &= ~srbm_soft_reset;
5449                 WREG32(SRBM_SOFT_RESET, tmp);
5450                 tmp = RREG32(SRBM_SOFT_RESET);
5451         }
5452
5453         /* Wait a little for things to settle down */
5454         udelay(50);
5455
5456         evergreen_mc_resume(rdev, &save);
5457         udelay(50);
5458
5459         cik_print_gpu_status_regs(rdev);
5460 }
5461
5462 struct kv_reset_save_regs {
5463         u32 gmcon_reng_execute;
5464         u32 gmcon_misc;
5465         u32 gmcon_misc3;
5466 };
5467
5468 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5469                                    struct kv_reset_save_regs *save)
5470 {
5471         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5472         save->gmcon_misc = RREG32(GMCON_MISC);
5473         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5474
5475         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5476         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5477                                                 STCTRL_STUTTER_EN));
5478 }
5479
5480 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5481                                       struct kv_reset_save_regs *save)
5482 {
5483         int i;
5484
5485         WREG32(GMCON_PGFSM_WRITE, 0);
5486         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5487
5488         for (i = 0; i < 5; i++)
5489                 WREG32(GMCON_PGFSM_WRITE, 0);
5490
5491         WREG32(GMCON_PGFSM_WRITE, 0);
5492         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5493
5494         for (i = 0; i < 5; i++)
5495                 WREG32(GMCON_PGFSM_WRITE, 0);
5496
5497         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5498         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5499
5500         for (i = 0; i < 5; i++)
5501                 WREG32(GMCON_PGFSM_WRITE, 0);
5502
5503         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5504         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5505
5506         for (i = 0; i < 5; i++)
5507                 WREG32(GMCON_PGFSM_WRITE, 0);
5508
5509         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5510         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5511
5512         for (i = 0; i < 5; i++)
5513                 WREG32(GMCON_PGFSM_WRITE, 0);
5514
5515         WREG32(GMCON_PGFSM_WRITE, 0);
5516         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5517
5518         for (i = 0; i < 5; i++)
5519                 WREG32(GMCON_PGFSM_WRITE, 0);
5520
5521         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5522         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5523
5524         for (i = 0; i < 5; i++)
5525                 WREG32(GMCON_PGFSM_WRITE, 0);
5526
5527         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5528         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5529
5530         for (i = 0; i < 5; i++)
5531                 WREG32(GMCON_PGFSM_WRITE, 0);
5532
5533         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5534         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5535
5536         for (i = 0; i < 5; i++)
5537                 WREG32(GMCON_PGFSM_WRITE, 0);
5538
5539         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5540         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5541
5542         for (i = 0; i < 5; i++)
5543                 WREG32(GMCON_PGFSM_WRITE, 0);
5544
5545         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5546         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5547
5548         WREG32(GMCON_MISC3, save->gmcon_misc3);
5549         WREG32(GMCON_MISC, save->gmcon_misc);
5550         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5551 }
5552
5553 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5554 {
5555         struct evergreen_mc_save save;
5556         struct kv_reset_save_regs kv_save = { 0 };
5557         u32 tmp, i;
5558
5559         dev_info(rdev->dev, "GPU pci config reset\n");
5560
5561         /* disable dpm? */
5562
5563         /* disable cg/pg */
5564         cik_fini_pg(rdev);
5565         cik_fini_cg(rdev);
5566
5567         /* Disable GFX parsing/prefetching */
5568         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5569
5570         /* Disable MEC parsing/prefetching */
5571         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5572
5573         /* sdma0 */
5574         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5575         tmp |= SDMA_HALT;
5576         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5577         /* sdma1 */
5578         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5579         tmp |= SDMA_HALT;
5580         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5581         /* XXX other engines? */
5582
5583         /* halt the rlc, disable cp internal ints */
5584         cik_rlc_stop(rdev);
5585
5586         udelay(50);
5587
5588         /* disable mem access */
5589         evergreen_mc_stop(rdev, &save);
5590         if (evergreen_mc_wait_for_idle(rdev)) {
5591                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5592         }
5593
5594         if (rdev->flags & RADEON_IS_IGP)
5595                 kv_save_regs_for_reset(rdev, &kv_save);
5596
5597         /* disable BM */
5598         pci_clear_master(rdev->pdev);
5599         /* reset */
5600         radeon_pci_config_reset(rdev);
5601
5602         udelay(100);
5603
5604         /* wait for asic to come out of reset */
5605         for (i = 0; i < rdev->usec_timeout; i++) {
5606                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5607                         break;
5608                 udelay(1);
5609         }
5610
5611         /* does asic init need to be run first??? */
5612         if (rdev->flags & RADEON_IS_IGP)
5613                 kv_restore_regs_for_reset(rdev, &kv_save);
5614 }
5615
5616 /**
5617  * cik_asic_reset - soft reset GPU
5618  *
5619  * @rdev: radeon_device pointer
5620  *
5621  * Look up which blocks are hung and attempt
5622  * to reset them.
5623  * Returns 0 for success.
5624  */
5625 int cik_asic_reset(struct radeon_device *rdev)
5626 {
5627         u32 reset_mask;
5628
5629         reset_mask = cik_gpu_check_soft_reset(rdev);
5630
5631         if (reset_mask)
5632                 r600_set_bios_scratch_engine_hung(rdev, true);
5633
5634         /* try soft reset */
5635         cik_gpu_soft_reset(rdev, reset_mask);
5636
5637         reset_mask = cik_gpu_check_soft_reset(rdev);
5638
5639         /* try pci config reset */
5640         if (reset_mask && radeon_hard_reset)
5641                 cik_gpu_pci_config_reset(rdev);
5642
5643         reset_mask = cik_gpu_check_soft_reset(rdev);
5644
5645         if (!reset_mask)
5646                 r600_set_bios_scratch_engine_hung(rdev, false);
5647
5648         return 0;
5649 }
5650
5651 /**
5652  * cik_gfx_is_lockup - check if the 3D engine is locked up
5653  *
5654  * @rdev: radeon_device pointer
5655  * @ring: radeon_ring structure holding ring information
5656  *
5657  * Check if the 3D engine is locked up (CIK).
5658  * Returns true if the engine is locked, false if not.
5659  */
5660 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5661 {
5662         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5663
5664         if (!(reset_mask & (RADEON_RESET_GFX |
5665                             RADEON_RESET_COMPUTE |
5666                             RADEON_RESET_CP))) {
5667                 radeon_ring_lockup_update(rdev, ring);
5668                 return false;
5669         }
5670         return radeon_ring_test_lockup(rdev, ring);
5671 }
5672
5673 /* MC */
5674 /**
5675  * cik_mc_program - program the GPU memory controller
5676  *
5677  * @rdev: radeon_device pointer
5678  *
5679  * Set the location of vram, gart, and AGP in the GPU's
5680  * physical address space (CIK).
5681  */
5682 static void cik_mc_program(struct radeon_device *rdev)
5683 {
5684         struct evergreen_mc_save save;
5685         u32 tmp;
5686         int i, j;
5687
5688         /* Initialize HDP */
5689         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5690                 WREG32((0x2c14 + j), 0x00000000);
5691                 WREG32((0x2c18 + j), 0x00000000);
5692                 WREG32((0x2c1c + j), 0x00000000);
5693                 WREG32((0x2c20 + j), 0x00000000);
5694                 WREG32((0x2c24 + j), 0x00000000);
5695         }
5696         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5697
5698         evergreen_mc_stop(rdev, &save);
5699         if (radeon_mc_wait_for_idle(rdev)) {
5700                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5701         }
5702         /* Lockout access through VGA aperture*/
5703         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5704         /* Update configuration */
5705         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5706                rdev->mc.vram_start >> 12);
5707         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5708                rdev->mc.vram_end >> 12);
5709         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5710                rdev->vram_scratch.gpu_addr >> 12);
5711         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5712         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5713         WREG32(MC_VM_FB_LOCATION, tmp);
5714         /* XXX double check these! */
5715         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5716         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5717         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5718         WREG32(MC_VM_AGP_BASE, 0);
5719         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5720         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5721         if (radeon_mc_wait_for_idle(rdev)) {
5722                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5723         }
5724         evergreen_mc_resume(rdev, &save);
5725         /* we need to own VRAM, so turn off the VGA renderer here
5726          * to stop it overwriting our objects */
5727         rv515_vga_render_disable(rdev);
5728 }
5729
5730 /**
5731  * cik_mc_init - initialize the memory controller driver params
5732  *
5733  * @rdev: radeon_device pointer
5734  *
5735  * Look up the amount of vram, vram width, and decide how to place
5736  * vram and gart within the GPU's physical address space (CIK).
5737  * Returns 0 for success.
5738  */
5739 static int cik_mc_init(struct radeon_device *rdev)
5740 {
5741         u32 tmp;
5742         int chansize, numchan;
5743
5744         /* Get VRAM informations */
5745         rdev->mc.vram_is_ddr = true;
5746         tmp = RREG32(MC_ARB_RAMCFG);
5747         if (tmp & CHANSIZE_MASK) {
5748                 chansize = 64;
5749         } else {
5750                 chansize = 32;
5751         }
5752         tmp = RREG32(MC_SHARED_CHMAP);
5753         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5754         case 0:
5755         default:
5756                 numchan = 1;
5757                 break;
5758         case 1:
5759                 numchan = 2;
5760                 break;
5761         case 2:
5762                 numchan = 4;
5763                 break;
5764         case 3:
5765                 numchan = 8;
5766                 break;
5767         case 4:
5768                 numchan = 3;
5769                 break;
5770         case 5:
5771                 numchan = 6;
5772                 break;
5773         case 6:
5774                 numchan = 10;
5775                 break;
5776         case 7:
5777                 numchan = 12;
5778                 break;
5779         case 8:
5780                 numchan = 16;
5781                 break;
5782         }
5783         rdev->mc.vram_width = numchan * chansize;
5784         /* Could aper size report 0 ? */
5785         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5786         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5787         /* size in MB on si */
5788         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5789         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5790         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5791         si_vram_gtt_location(rdev, &rdev->mc);
5792         radeon_update_bandwidth_info(rdev);
5793
5794         return 0;
5795 }
5796
5797 /*
5798  * GART
5799  * VMID 0 is the physical GPU addresses as used by the kernel.
5800  * VMIDs 1-15 are used for userspace clients and are handled
5801  * by the radeon vm/hsa code.
5802  */
5803 /**
5804  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5805  *
5806  * @rdev: radeon_device pointer
5807  *
5808  * Flush the TLB for the VMID 0 page table (CIK).
5809  */
5810 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5811 {
5812         /* flush hdp cache */
5813         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5814
5815         /* bits 0-15 are the VM contexts0-15 */
5816         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5817 }
5818
5819 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5820 {
5821         int i;
5822         uint32_t sh_mem_bases, sh_mem_config;
5823
5824         sh_mem_bases = 0x6000 | 0x6000 << 16;
5825         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5826         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5827
5828         mutex_lock(&rdev->srbm_mutex);
5829         for (i = 8; i < 16; i++) {
5830                 cik_srbm_select(rdev, 0, 0, 0, i);
5831                 /* CP and shaders */
5832                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5833                 WREG32(SH_MEM_APE1_BASE, 1);
5834                 WREG32(SH_MEM_APE1_LIMIT, 0);
5835                 WREG32(SH_MEM_BASES, sh_mem_bases);
5836         }
5837         cik_srbm_select(rdev, 0, 0, 0, 0);
5838         mutex_unlock(&rdev->srbm_mutex);
5839 }
5840
5841 /**
5842  * cik_pcie_gart_enable - gart enable
5843  *
5844  * @rdev: radeon_device pointer
5845  *
5846  * This sets up the TLBs, programs the page tables for VMID0,
5847  * sets up the hw for VMIDs 1-15 which are allocated on
5848  * demand, and sets up the global locations for the LDS, GDS,
5849  * and GPUVM for FSA64 clients (CIK).
5850  * Returns 0 for success, errors for failure.
5851  */
5852 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5853 {
5854         int r, i;
5855
5856         if (rdev->gart.robj == NULL) {
5857                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5858                 return -EINVAL;
5859         }
5860         r = radeon_gart_table_vram_pin(rdev);
5861         if (r)
5862                 return r;
5863         /* Setup TLB control */
5864         WREG32(MC_VM_MX_L1_TLB_CNTL,
5865                (0xA << 7) |
5866                ENABLE_L1_TLB |
5867                ENABLE_L1_FRAGMENT_PROCESSING |
5868                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5869                ENABLE_ADVANCED_DRIVER_MODEL |
5870                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5871         /* Setup L2 cache */
5872         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5873                ENABLE_L2_FRAGMENT_PROCESSING |
5874                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5875                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5876                EFFECTIVE_L2_QUEUE_SIZE(7) |
5877                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5878         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5879         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5880                BANK_SELECT(4) |
5881                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5882         /* setup context0 */
5883         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5884         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5885         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5886         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5887                         (u32)(rdev->dummy_page.addr >> 12));
5888         WREG32(VM_CONTEXT0_CNTL2, 0);
5889         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5890                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5891
5892         WREG32(0x15D4, 0);
5893         WREG32(0x15D8, 0);
5894         WREG32(0x15DC, 0);
5895
5896         /* restore context1-15 */
5897         /* set vm size, must be a multiple of 4 */
5898         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5899         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5900         for (i = 1; i < 16; i++) {
5901                 if (i < 8)
5902                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5903                                rdev->vm_manager.saved_table_addr[i]);
5904                 else
5905                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5906                                rdev->vm_manager.saved_table_addr[i]);
5907         }
5908
5909         /* enable context1-15 */
5910         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5911                (u32)(rdev->dummy_page.addr >> 12));
5912         WREG32(VM_CONTEXT1_CNTL2, 4);
5913         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5914                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5915                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5916                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5917                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5918                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5919                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5920                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5921                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5922                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5923                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5924                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5925                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5926                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5927
5928         if (rdev->family == CHIP_KAVERI) {
5929                 u32 tmp = RREG32(CHUB_CONTROL);
5930                 tmp &= ~BYPASS_VM;
5931                 WREG32(CHUB_CONTROL, tmp);
5932         }
5933
5934         /* XXX SH_MEM regs */
5935         /* where to put LDS, scratch, GPUVM in FSA64 space */
5936         mutex_lock(&rdev->srbm_mutex);
5937         for (i = 0; i < 16; i++) {
5938                 cik_srbm_select(rdev, 0, 0, 0, i);
5939                 /* CP and shaders */
5940                 WREG32(SH_MEM_CONFIG, 0);
5941                 WREG32(SH_MEM_APE1_BASE, 1);
5942                 WREG32(SH_MEM_APE1_LIMIT, 0);
5943                 WREG32(SH_MEM_BASES, 0);
5944                 /* SDMA GFX */
5945                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5946                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5947                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5948                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5949                 /* XXX SDMA RLC - todo */
5950         }
5951         cik_srbm_select(rdev, 0, 0, 0, 0);
5952         mutex_unlock(&rdev->srbm_mutex);
5953
5954         cik_pcie_init_compute_vmid(rdev);
5955
5956         cik_pcie_gart_tlb_flush(rdev);
5957         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5958                  (unsigned)(rdev->mc.gtt_size >> 20),
5959                  (unsigned long long)rdev->gart.table_addr);
5960         rdev->gart.ready = true;
5961         return 0;
5962 }
5963
5964 /**
5965  * cik_pcie_gart_disable - gart disable
5966  *
5967  * @rdev: radeon_device pointer
5968  *
5969  * This disables all VM page table (CIK).
5970  */
5971 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5972 {
5973         unsigned i;
5974
5975         for (i = 1; i < 16; ++i) {
5976                 uint32_t reg;
5977                 if (i < 8)
5978                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5979                 else
5980                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5981                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5982         }
5983
5984         /* Disable all tables */
5985         WREG32(VM_CONTEXT0_CNTL, 0);
5986         WREG32(VM_CONTEXT1_CNTL, 0);
5987         /* Setup TLB control */
5988         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5989                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5990         /* Setup L2 cache */
5991         WREG32(VM_L2_CNTL,
5992                ENABLE_L2_FRAGMENT_PROCESSING |
5993                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5994                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5995                EFFECTIVE_L2_QUEUE_SIZE(7) |
5996                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5997         WREG32(VM_L2_CNTL2, 0);
5998         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5999                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
6000         radeon_gart_table_vram_unpin(rdev);
6001 }
6002
6003 /**
6004  * cik_pcie_gart_fini - vm fini callback
6005  *
6006  * @rdev: radeon_device pointer
6007  *
6008  * Tears down the driver GART/VM setup (CIK).
6009  */
6010 static void cik_pcie_gart_fini(struct radeon_device *rdev)
6011 {
6012         cik_pcie_gart_disable(rdev);
6013         radeon_gart_table_vram_free(rdev);
6014         radeon_gart_fini(rdev);
6015 }
6016
6017 /* vm parser */
6018 /**
6019  * cik_ib_parse - vm ib_parse callback
6020  *
6021  * @rdev: radeon_device pointer
6022  * @ib: indirect buffer pointer
6023  *
6024  * CIK uses hw IB checking so this is a nop (CIK).
6025  */
6026 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6027 {
6028         return 0;
6029 }
6030
6031 /*
6032  * vm
6033  * VMID 0 is the physical GPU addresses as used by the kernel.
6034  * VMIDs 1-15 are used for userspace clients and are handled
6035  * by the radeon vm/hsa code.
6036  */
6037 /**
6038  * cik_vm_init - cik vm init callback
6039  *
6040  * @rdev: radeon_device pointer
6041  *
6042  * Inits cik specific vm parameters (number of VMs, base of vram for
6043  * VMIDs 1-15) (CIK).
6044  * Returns 0 for success.
6045  */
6046 int cik_vm_init(struct radeon_device *rdev)
6047 {
6048         /*
6049          * number of VMs
6050          * VMID 0 is reserved for System
6051          * radeon graphics/compute will use VMIDs 1-7
6052          * amdkfd will use VMIDs 8-15
6053          */
6054         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6055         /* base offset of vram pages */
6056         if (rdev->flags & RADEON_IS_IGP) {
6057                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
6058                 tmp <<= 22;
6059                 rdev->vm_manager.vram_base_offset = tmp;
6060         } else
6061                 rdev->vm_manager.vram_base_offset = 0;
6062
6063         return 0;
6064 }
6065
6066 /**
6067  * cik_vm_fini - cik vm fini callback
6068  *
6069  * @rdev: radeon_device pointer
6070  *
6071  * Tear down any asic specific VM setup (CIK).
6072  */
6073 void cik_vm_fini(struct radeon_device *rdev)
6074 {
6075 }
6076
6077 /**
6078  * cik_vm_decode_fault - print human readable fault info
6079  *
6080  * @rdev: radeon_device pointer
6081  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6082  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6083  *
6084  * Print human readable fault information (CIK).
6085  */
6086 static void cik_vm_decode_fault(struct radeon_device *rdev,
6087                                 u32 status, u32 addr, u32 mc_client)
6088 {
6089         u32 mc_id;
6090         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6091         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6092         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6093                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6094
6095         if (rdev->family == CHIP_HAWAII)
6096                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6097         else
6098                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6099
6100         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6101                protections, vmid, addr,
6102                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6103                block, mc_client, mc_id);
6104 }
6105
6106 /**
6107  * cik_vm_flush - cik vm flush using the CP
6108  *
6109  * @rdev: radeon_device pointer
6110  *
6111  * Update the page table base and flush the VM TLB
6112  * using the CP (CIK).
6113  */
6114 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6115                   unsigned vm_id, uint64_t pd_addr)
6116 {
6117         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6118
6119         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6120         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6121                                  WRITE_DATA_DST_SEL(0)));
6122         if (vm_id < 8) {
6123                 radeon_ring_write(ring,
6124                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6125         } else {
6126                 radeon_ring_write(ring,
6127                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6128         }
6129         radeon_ring_write(ring, 0);
6130         radeon_ring_write(ring, pd_addr >> 12);
6131
6132         /* update SH_MEM_* regs */
6133         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6134         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6135                                  WRITE_DATA_DST_SEL(0)));
6136         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6137         radeon_ring_write(ring, 0);
6138         radeon_ring_write(ring, VMID(vm_id));
6139
6140         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6141         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6142                                  WRITE_DATA_DST_SEL(0)));
6143         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6144         radeon_ring_write(ring, 0);
6145
6146         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6147         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6148         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6149         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6150
6151         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6152         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6153                                  WRITE_DATA_DST_SEL(0)));
6154         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6155         radeon_ring_write(ring, 0);
6156         radeon_ring_write(ring, VMID(0));
6157
6158         /* HDP flush */
6159         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6160
6161         /* bits 0-15 are the VM contexts0-15 */
6162         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6163         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6164                                  WRITE_DATA_DST_SEL(0)));
6165         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6166         radeon_ring_write(ring, 0);
6167         radeon_ring_write(ring, 1 << vm_id);
6168
6169         /* wait for the invalidate to complete */
6170         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6171         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6172                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6173                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6174         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6175         radeon_ring_write(ring, 0);
6176         radeon_ring_write(ring, 0); /* ref */
6177         radeon_ring_write(ring, 0); /* mask */
6178         radeon_ring_write(ring, 0x20); /* poll interval */
6179
6180         /* compute doesn't have PFP */
6181         if (usepfp) {
6182                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6183                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6184                 radeon_ring_write(ring, 0x0);
6185         }
6186 }
6187
6188 /*
6189  * RLC
6190  * The RLC is a multi-purpose microengine that handles a
6191  * variety of functions, the most important of which is
6192  * the interrupt controller.
6193  */
6194 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6195                                           bool enable)
6196 {
6197         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6198
6199         if (enable)
6200                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6201         else
6202                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6203         WREG32(CP_INT_CNTL_RING0, tmp);
6204 }
6205
6206 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6207 {
6208         u32 tmp;
6209
6210         tmp = RREG32(RLC_LB_CNTL);
6211         if (enable)
6212                 tmp |= LOAD_BALANCE_ENABLE;
6213         else
6214                 tmp &= ~LOAD_BALANCE_ENABLE;
6215         WREG32(RLC_LB_CNTL, tmp);
6216 }
6217
6218 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6219 {
6220         u32 i, j, k;
6221         u32 mask;
6222
6223         mutex_lock(&rdev->grbm_idx_mutex);
6224         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6225                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6226                         cik_select_se_sh(rdev, i, j);
6227                         for (k = 0; k < rdev->usec_timeout; k++) {
6228                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6229                                         break;
6230                                 udelay(1);
6231                         }
6232                 }
6233         }
6234         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6235         mutex_unlock(&rdev->grbm_idx_mutex);
6236
6237         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6238         for (k = 0; k < rdev->usec_timeout; k++) {
6239                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6240                         break;
6241                 udelay(1);
6242         }
6243 }
6244
6245 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6246 {
6247         u32 tmp;
6248
6249         tmp = RREG32(RLC_CNTL);
6250         if (tmp != rlc)
6251                 WREG32(RLC_CNTL, rlc);
6252 }
6253
6254 static u32 cik_halt_rlc(struct radeon_device *rdev)
6255 {
6256         u32 data, orig;
6257
6258         orig = data = RREG32(RLC_CNTL);
6259
6260         if (data & RLC_ENABLE) {
6261                 u32 i;
6262
6263                 data &= ~RLC_ENABLE;
6264                 WREG32(RLC_CNTL, data);
6265
6266                 for (i = 0; i < rdev->usec_timeout; i++) {
6267                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6268                                 break;
6269                         udelay(1);
6270                 }
6271
6272                 cik_wait_for_rlc_serdes(rdev);
6273         }
6274
6275         return orig;
6276 }
6277
6278 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6279 {
6280         u32 tmp, i, mask;
6281
6282         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6283         WREG32(RLC_GPR_REG2, tmp);
6284
6285         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6286         for (i = 0; i < rdev->usec_timeout; i++) {
6287                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6288                         break;
6289                 udelay(1);
6290         }
6291
6292         for (i = 0; i < rdev->usec_timeout; i++) {
6293                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6294                         break;
6295                 udelay(1);
6296         }
6297 }
6298
6299 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6300 {
6301         u32 tmp;
6302
6303         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6304         WREG32(RLC_GPR_REG2, tmp);
6305 }
6306
6307 /**
6308  * cik_rlc_stop - stop the RLC ME
6309  *
6310  * @rdev: radeon_device pointer
6311  *
6312  * Halt the RLC ME (MicroEngine) (CIK).
6313  */
6314 static void cik_rlc_stop(struct radeon_device *rdev)
6315 {
6316         WREG32(RLC_CNTL, 0);
6317
6318         cik_enable_gui_idle_interrupt(rdev, false);
6319
6320         cik_wait_for_rlc_serdes(rdev);
6321 }
6322
6323 /**
6324  * cik_rlc_start - start the RLC ME
6325  *
6326  * @rdev: radeon_device pointer
6327  *
6328  * Unhalt the RLC ME (MicroEngine) (CIK).
6329  */
6330 static void cik_rlc_start(struct radeon_device *rdev)
6331 {
6332         WREG32(RLC_CNTL, RLC_ENABLE);
6333
6334         cik_enable_gui_idle_interrupt(rdev, true);
6335
6336         udelay(50);
6337 }
6338
6339 /**
6340  * cik_rlc_resume - setup the RLC hw
6341  *
6342  * @rdev: radeon_device pointer
6343  *
6344  * Initialize the RLC registers, load the ucode,
6345  * and start the RLC (CIK).
6346  * Returns 0 for success, -EINVAL if the ucode is not available.
6347  */
6348 static int cik_rlc_resume(struct radeon_device *rdev)
6349 {
6350         u32 i, size, tmp;
6351
6352         if (!rdev->rlc_fw)
6353                 return -EINVAL;
6354
6355         cik_rlc_stop(rdev);
6356
6357         /* disable CG */
6358         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6359         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6360
6361         si_rlc_reset(rdev);
6362
6363         cik_init_pg(rdev);
6364
6365         cik_init_cg(rdev);
6366
6367         WREG32(RLC_LB_CNTR_INIT, 0);
6368         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6369
6370         mutex_lock(&rdev->grbm_idx_mutex);
6371         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6372         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6373         WREG32(RLC_LB_PARAMS, 0x00600408);
6374         WREG32(RLC_LB_CNTL, 0x80000004);
6375         mutex_unlock(&rdev->grbm_idx_mutex);
6376
6377         WREG32(RLC_MC_CNTL, 0);
6378         WREG32(RLC_UCODE_CNTL, 0);
6379
6380         if (rdev->new_fw) {
6381                 const struct rlc_firmware_header_v1_0 *hdr =
6382                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6383                 const __le32 *fw_data = (const __le32 *)
6384                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6385
6386                 radeon_ucode_print_rlc_hdr(&hdr->header);
6387
6388                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6389                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6390                 for (i = 0; i < size; i++)
6391                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6392                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6393         } else {
6394                 const __be32 *fw_data;
6395
6396                 switch (rdev->family) {
6397                 case CHIP_BONAIRE:
6398                 case CHIP_HAWAII:
6399                 default:
6400                         size = BONAIRE_RLC_UCODE_SIZE;
6401                         break;
6402                 case CHIP_KAVERI:
6403                         size = KV_RLC_UCODE_SIZE;
6404                         break;
6405                 case CHIP_KABINI:
6406                         size = KB_RLC_UCODE_SIZE;
6407                         break;
6408                 case CHIP_MULLINS:
6409                         size = ML_RLC_UCODE_SIZE;
6410                         break;
6411                 }
6412
6413                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6414                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6415                 for (i = 0; i < size; i++)
6416                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6417                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6418         }
6419
6420         /* XXX - find out what chips support lbpw */
6421         cik_enable_lbpw(rdev, false);
6422
6423         if (rdev->family == CHIP_BONAIRE)
6424                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6425
6426         cik_rlc_start(rdev);
6427
6428         return 0;
6429 }
6430
6431 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6432 {
6433         u32 data, orig, tmp, tmp2;
6434
6435         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6436
6437         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6438                 cik_enable_gui_idle_interrupt(rdev, true);
6439
6440                 tmp = cik_halt_rlc(rdev);
6441
6442                 mutex_lock(&rdev->grbm_idx_mutex);
6443                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6444                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6445                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6446                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6447                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6448                 mutex_unlock(&rdev->grbm_idx_mutex);
6449
6450                 cik_update_rlc(rdev, tmp);
6451
6452                 data |= CGCG_EN | CGLS_EN;
6453         } else {
6454                 cik_enable_gui_idle_interrupt(rdev, false);
6455
6456                 RREG32(CB_CGTT_SCLK_CTRL);
6457                 RREG32(CB_CGTT_SCLK_CTRL);
6458                 RREG32(CB_CGTT_SCLK_CTRL);
6459                 RREG32(CB_CGTT_SCLK_CTRL);
6460
6461                 data &= ~(CGCG_EN | CGLS_EN);
6462         }
6463
6464         if (orig != data)
6465                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6466
6467 }
6468
6469 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6470 {
6471         u32 data, orig, tmp = 0;
6472
6473         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6474                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6475                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6476                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6477                                 data |= CP_MEM_LS_EN;
6478                                 if (orig != data)
6479                                         WREG32(CP_MEM_SLP_CNTL, data);
6480                         }
6481                 }
6482
6483                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6484                 data |= 0x00000001;
6485                 data &= 0xfffffffd;
6486                 if (orig != data)
6487                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6488
6489                 tmp = cik_halt_rlc(rdev);
6490
6491                 mutex_lock(&rdev->grbm_idx_mutex);
6492                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6493                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6494                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6495                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6496                 WREG32(RLC_SERDES_WR_CTRL, data);
6497                 mutex_unlock(&rdev->grbm_idx_mutex);
6498
6499                 cik_update_rlc(rdev, tmp);
6500
6501                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6502                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6503                         data &= ~SM_MODE_MASK;
6504                         data |= SM_MODE(0x2);
6505                         data |= SM_MODE_ENABLE;
6506                         data &= ~CGTS_OVERRIDE;
6507                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6508                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6509                                 data &= ~CGTS_LS_OVERRIDE;
6510                         data &= ~ON_MONITOR_ADD_MASK;
6511                         data |= ON_MONITOR_ADD_EN;
6512                         data |= ON_MONITOR_ADD(0x96);
6513                         if (orig != data)
6514                                 WREG32(CGTS_SM_CTRL_REG, data);
6515                 }
6516         } else {
6517                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6518                 data |= 0x00000003;
6519                 if (orig != data)
6520                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6521
6522                 data = RREG32(RLC_MEM_SLP_CNTL);
6523                 if (data & RLC_MEM_LS_EN) {
6524                         data &= ~RLC_MEM_LS_EN;
6525                         WREG32(RLC_MEM_SLP_CNTL, data);
6526                 }
6527
6528                 data = RREG32(CP_MEM_SLP_CNTL);
6529                 if (data & CP_MEM_LS_EN) {
6530                         data &= ~CP_MEM_LS_EN;
6531                         WREG32(CP_MEM_SLP_CNTL, data);
6532                 }
6533
6534                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6535                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6536                 if (orig != data)
6537                         WREG32(CGTS_SM_CTRL_REG, data);
6538
6539                 tmp = cik_halt_rlc(rdev);
6540
6541                 mutex_lock(&rdev->grbm_idx_mutex);
6542                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6543                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6544                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6545                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6546                 WREG32(RLC_SERDES_WR_CTRL, data);
6547                 mutex_unlock(&rdev->grbm_idx_mutex);
6548
6549                 cik_update_rlc(rdev, tmp);
6550         }
6551 }
6552
6553 static const u32 mc_cg_registers[] =
6554 {
6555         MC_HUB_MISC_HUB_CG,
6556         MC_HUB_MISC_SIP_CG,
6557         MC_HUB_MISC_VM_CG,
6558         MC_XPB_CLK_GAT,
6559         ATC_MISC_CG,
6560         MC_CITF_MISC_WR_CG,
6561         MC_CITF_MISC_RD_CG,
6562         MC_CITF_MISC_VM_CG,
6563         VM_L2_CG,
6564 };
6565
6566 static void cik_enable_mc_ls(struct radeon_device *rdev,
6567                              bool enable)
6568 {
6569         int i;
6570         u32 orig, data;
6571
6572         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6573                 orig = data = RREG32(mc_cg_registers[i]);
6574                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6575                         data |= MC_LS_ENABLE;
6576                 else
6577                         data &= ~MC_LS_ENABLE;
6578                 if (data != orig)
6579                         WREG32(mc_cg_registers[i], data);
6580         }
6581 }
6582
6583 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6584                                bool enable)
6585 {
6586         int i;
6587         u32 orig, data;
6588
6589         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6590                 orig = data = RREG32(mc_cg_registers[i]);
6591                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6592                         data |= MC_CG_ENABLE;
6593                 else
6594                         data &= ~MC_CG_ENABLE;
6595                 if (data != orig)
6596                         WREG32(mc_cg_registers[i], data);
6597         }
6598 }
6599
6600 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6601                                  bool enable)
6602 {
6603         u32 orig, data;
6604
6605         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6606                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6607                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6608         } else {
6609                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6610                 data |= 0xff000000;
6611                 if (data != orig)
6612                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6613
6614                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6615                 data |= 0xff000000;
6616                 if (data != orig)
6617                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6618         }
6619 }
6620
6621 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6622                                  bool enable)
6623 {
6624         u32 orig, data;
6625
6626         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6627                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6628                 data |= 0x100;
6629                 if (orig != data)
6630                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6631
6632                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6633                 data |= 0x100;
6634                 if (orig != data)
6635                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6636         } else {
6637                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6638                 data &= ~0x100;
6639                 if (orig != data)
6640                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6641
6642                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6643                 data &= ~0x100;
6644                 if (orig != data)
6645                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6646         }
6647 }
6648
6649 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6650                                 bool enable)
6651 {
6652         u32 orig, data;
6653
6654         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6655                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6656                 data = 0xfff;
6657                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6658
6659                 orig = data = RREG32(UVD_CGC_CTRL);
6660                 data |= DCM;
6661                 if (orig != data)
6662                         WREG32(UVD_CGC_CTRL, data);
6663         } else {
6664                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6665                 data &= ~0xfff;
6666                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6667
6668                 orig = data = RREG32(UVD_CGC_CTRL);
6669                 data &= ~DCM;
6670                 if (orig != data)
6671                         WREG32(UVD_CGC_CTRL, data);
6672         }
6673 }
6674
6675 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6676                                bool enable)
6677 {
6678         u32 orig, data;
6679
6680         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6681
6682         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6683                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6684                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6685         else
6686                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6687                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6688
6689         if (orig != data)
6690                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6691 }
6692
6693 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6694                                 bool enable)
6695 {
6696         u32 orig, data;
6697
6698         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6699
6700         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6701                 data &= ~CLOCK_GATING_DIS;
6702         else
6703                 data |= CLOCK_GATING_DIS;
6704
6705         if (orig != data)
6706                 WREG32(HDP_HOST_PATH_CNTL, data);
6707 }
6708
6709 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6710                               bool enable)
6711 {
6712         u32 orig, data;
6713
6714         orig = data = RREG32(HDP_MEM_POWER_LS);
6715
6716         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6717                 data |= HDP_LS_ENABLE;
6718         else
6719                 data &= ~HDP_LS_ENABLE;
6720
6721         if (orig != data)
6722                 WREG32(HDP_MEM_POWER_LS, data);
6723 }
6724
6725 void cik_update_cg(struct radeon_device *rdev,
6726                    u32 block, bool enable)
6727 {
6728
6729         if (block & RADEON_CG_BLOCK_GFX) {
6730                 cik_enable_gui_idle_interrupt(rdev, false);
6731                 /* order matters! */
6732                 if (enable) {
6733                         cik_enable_mgcg(rdev, true);
6734                         cik_enable_cgcg(rdev, true);
6735                 } else {
6736                         cik_enable_cgcg(rdev, false);
6737                         cik_enable_mgcg(rdev, false);
6738                 }
6739                 cik_enable_gui_idle_interrupt(rdev, true);
6740         }
6741
6742         if (block & RADEON_CG_BLOCK_MC) {
6743                 if (!(rdev->flags & RADEON_IS_IGP)) {
6744                         cik_enable_mc_mgcg(rdev, enable);
6745                         cik_enable_mc_ls(rdev, enable);
6746                 }
6747         }
6748
6749         if (block & RADEON_CG_BLOCK_SDMA) {
6750                 cik_enable_sdma_mgcg(rdev, enable);
6751                 cik_enable_sdma_mgls(rdev, enable);
6752         }
6753
6754         if (block & RADEON_CG_BLOCK_BIF) {
6755                 cik_enable_bif_mgls(rdev, enable);
6756         }
6757
6758         if (block & RADEON_CG_BLOCK_UVD) {
6759                 if (rdev->has_uvd)
6760                         cik_enable_uvd_mgcg(rdev, enable);
6761         }
6762
6763         if (block & RADEON_CG_BLOCK_HDP) {
6764                 cik_enable_hdp_mgcg(rdev, enable);
6765                 cik_enable_hdp_ls(rdev, enable);
6766         }
6767
6768         if (block & RADEON_CG_BLOCK_VCE) {
6769                 vce_v2_0_enable_mgcg(rdev, enable);
6770         }
6771 }
6772
6773 static void cik_init_cg(struct radeon_device *rdev)
6774 {
6775
6776         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6777
6778         if (rdev->has_uvd)
6779                 si_init_uvd_internal_cg(rdev);
6780
6781         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6782                              RADEON_CG_BLOCK_SDMA |
6783                              RADEON_CG_BLOCK_BIF |
6784                              RADEON_CG_BLOCK_UVD |
6785                              RADEON_CG_BLOCK_HDP), true);
6786 }
6787
6788 static void cik_fini_cg(struct radeon_device *rdev)
6789 {
6790         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6791                              RADEON_CG_BLOCK_SDMA |
6792                              RADEON_CG_BLOCK_BIF |
6793                              RADEON_CG_BLOCK_UVD |
6794                              RADEON_CG_BLOCK_HDP), false);
6795
6796         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6797 }
6798
6799 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6800                                           bool enable)
6801 {
6802         u32 data, orig;
6803
6804         orig = data = RREG32(RLC_PG_CNTL);
6805         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6806                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6807         else
6808                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6809         if (orig != data)
6810                 WREG32(RLC_PG_CNTL, data);
6811 }
6812
6813 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6814                                           bool enable)
6815 {
6816         u32 data, orig;
6817
6818         orig = data = RREG32(RLC_PG_CNTL);
6819         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6820                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6821         else
6822                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6823         if (orig != data)
6824                 WREG32(RLC_PG_CNTL, data);
6825 }
6826
6827 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6828 {
6829         u32 data, orig;
6830
6831         orig = data = RREG32(RLC_PG_CNTL);
6832         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6833                 data &= ~DISABLE_CP_PG;
6834         else
6835                 data |= DISABLE_CP_PG;
6836         if (orig != data)
6837                 WREG32(RLC_PG_CNTL, data);
6838 }
6839
6840 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6841 {
6842         u32 data, orig;
6843
6844         orig = data = RREG32(RLC_PG_CNTL);
6845         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6846                 data &= ~DISABLE_GDS_PG;
6847         else
6848                 data |= DISABLE_GDS_PG;
6849         if (orig != data)
6850                 WREG32(RLC_PG_CNTL, data);
6851 }
6852
6853 #define CP_ME_TABLE_SIZE    96
6854 #define CP_ME_TABLE_OFFSET  2048
6855 #define CP_MEC_TABLE_OFFSET 4096
6856
6857 void cik_init_cp_pg_table(struct radeon_device *rdev)
6858 {
6859         volatile u32 *dst_ptr;
6860         int me, i, max_me = 4;
6861         u32 bo_offset = 0;
6862         u32 table_offset, table_size;
6863
6864         if (rdev->family == CHIP_KAVERI)
6865                 max_me = 5;
6866
6867         if (rdev->rlc.cp_table_ptr == NULL)
6868                 return;
6869
6870         /* write the cp table buffer */
6871         dst_ptr = rdev->rlc.cp_table_ptr;
6872         for (me = 0; me < max_me; me++) {
6873                 if (rdev->new_fw) {
6874                         const __le32 *fw_data;
6875                         const struct gfx_firmware_header_v1_0 *hdr;
6876
6877                         if (me == 0) {
6878                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6879                                 fw_data = (const __le32 *)
6880                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6881                                 table_offset = le32_to_cpu(hdr->jt_offset);
6882                                 table_size = le32_to_cpu(hdr->jt_size);
6883                         } else if (me == 1) {
6884                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6885                                 fw_data = (const __le32 *)
6886                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6887                                 table_offset = le32_to_cpu(hdr->jt_offset);
6888                                 table_size = le32_to_cpu(hdr->jt_size);
6889                         } else if (me == 2) {
6890                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6891                                 fw_data = (const __le32 *)
6892                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6893                                 table_offset = le32_to_cpu(hdr->jt_offset);
6894                                 table_size = le32_to_cpu(hdr->jt_size);
6895                         } else if (me == 3) {
6896                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6897                                 fw_data = (const __le32 *)
6898                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6899                                 table_offset = le32_to_cpu(hdr->jt_offset);
6900                                 table_size = le32_to_cpu(hdr->jt_size);
6901                         } else {
6902                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6903                                 fw_data = (const __le32 *)
6904                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6905                                 table_offset = le32_to_cpu(hdr->jt_offset);
6906                                 table_size = le32_to_cpu(hdr->jt_size);
6907                         }
6908
6909                         for (i = 0; i < table_size; i ++) {
6910                                 dst_ptr[bo_offset + i] =
6911                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6912                         }
6913                         bo_offset += table_size;
6914                 } else {
6915                         const __be32 *fw_data;
6916                         table_size = CP_ME_TABLE_SIZE;
6917
6918                         if (me == 0) {
6919                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6920                                 table_offset = CP_ME_TABLE_OFFSET;
6921                         } else if (me == 1) {
6922                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6923                                 table_offset = CP_ME_TABLE_OFFSET;
6924                         } else if (me == 2) {
6925                                 fw_data = (const __be32 *)rdev->me_fw->data;
6926                                 table_offset = CP_ME_TABLE_OFFSET;
6927                         } else {
6928                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6929                                 table_offset = CP_MEC_TABLE_OFFSET;
6930                         }
6931
6932                         for (i = 0; i < table_size; i ++) {
6933                                 dst_ptr[bo_offset + i] =
6934                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6935                         }
6936                         bo_offset += table_size;
6937                 }
6938         }
6939 }
6940
6941 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6942                                 bool enable)
6943 {
6944         u32 data, orig;
6945
6946         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6947                 orig = data = RREG32(RLC_PG_CNTL);
6948                 data |= GFX_PG_ENABLE;
6949                 if (orig != data)
6950                         WREG32(RLC_PG_CNTL, data);
6951
6952                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6953                 data |= AUTO_PG_EN;
6954                 if (orig != data)
6955                         WREG32(RLC_AUTO_PG_CTRL, data);
6956         } else {
6957                 orig = data = RREG32(RLC_PG_CNTL);
6958                 data &= ~GFX_PG_ENABLE;
6959                 if (orig != data)
6960                         WREG32(RLC_PG_CNTL, data);
6961
6962                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6963                 data &= ~AUTO_PG_EN;
6964                 if (orig != data)
6965                         WREG32(RLC_AUTO_PG_CTRL, data);
6966
6967                 data = RREG32(DB_RENDER_CONTROL);
6968         }
6969 }
6970
6971 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6972 {
6973         u32 mask = 0, tmp, tmp1;
6974         int i;
6975
6976         mutex_lock(&rdev->grbm_idx_mutex);
6977         cik_select_se_sh(rdev, se, sh);
6978         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6979         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6980         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6981         mutex_unlock(&rdev->grbm_idx_mutex);
6982
6983         tmp &= 0xffff0000;
6984
6985         tmp |= tmp1;
6986         tmp >>= 16;
6987
6988         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6989                 mask <<= 1;
6990                 mask |= 1;
6991         }
6992
6993         return (~tmp) & mask;
6994 }
6995
6996 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6997 {
6998         u32 i, j, k, active_cu_number = 0;
6999         u32 mask, counter, cu_bitmap;
7000         u32 tmp = 0;
7001
7002         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
7003                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
7004                         mask = 1;
7005                         cu_bitmap = 0;
7006                         counter = 0;
7007                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
7008                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
7009                                         if (counter < 2)
7010                                                 cu_bitmap |= mask;
7011                                         counter ++;
7012                                 }
7013                                 mask <<= 1;
7014                         }
7015
7016                         active_cu_number += counter;
7017                         tmp |= (cu_bitmap << (i * 16 + j * 8));
7018                 }
7019         }
7020
7021         WREG32(RLC_PG_AO_CU_MASK, tmp);
7022
7023         tmp = RREG32(RLC_MAX_PG_CU);
7024         tmp &= ~MAX_PU_CU_MASK;
7025         tmp |= MAX_PU_CU(active_cu_number);
7026         WREG32(RLC_MAX_PG_CU, tmp);
7027 }
7028
7029 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7030                                        bool enable)
7031 {
7032         u32 data, orig;
7033
7034         orig = data = RREG32(RLC_PG_CNTL);
7035         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7036                 data |= STATIC_PER_CU_PG_ENABLE;
7037         else
7038                 data &= ~STATIC_PER_CU_PG_ENABLE;
7039         if (orig != data)
7040                 WREG32(RLC_PG_CNTL, data);
7041 }
7042
7043 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7044                                         bool enable)
7045 {
7046         u32 data, orig;
7047
7048         orig = data = RREG32(RLC_PG_CNTL);
7049         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7050                 data |= DYN_PER_CU_PG_ENABLE;
7051         else
7052                 data &= ~DYN_PER_CU_PG_ENABLE;
7053         if (orig != data)
7054                 WREG32(RLC_PG_CNTL, data);
7055 }
7056
7057 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7058 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7059
7060 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7061 {
7062         u32 data, orig;
7063         u32 i;
7064
7065         if (rdev->rlc.cs_data) {
7066                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7067                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7068                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7069                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7070         } else {
7071                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7072                 for (i = 0; i < 3; i++)
7073                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
7074         }
7075         if (rdev->rlc.reg_list) {
7076                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7077                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
7078                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7079         }
7080
7081         orig = data = RREG32(RLC_PG_CNTL);
7082         data |= GFX_PG_SRC;
7083         if (orig != data)
7084                 WREG32(RLC_PG_CNTL, data);
7085
7086         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7087         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7088
7089         data = RREG32(CP_RB_WPTR_POLL_CNTL);
7090         data &= ~IDLE_POLL_COUNT_MASK;
7091         data |= IDLE_POLL_COUNT(0x60);
7092         WREG32(CP_RB_WPTR_POLL_CNTL, data);
7093
7094         data = 0x10101010;
7095         WREG32(RLC_PG_DELAY, data);
7096
7097         data = RREG32(RLC_PG_DELAY_2);
7098         data &= ~0xff;
7099         data |= 0x3;
7100         WREG32(RLC_PG_DELAY_2, data);
7101
7102         data = RREG32(RLC_AUTO_PG_CTRL);
7103         data &= ~GRBM_REG_SGIT_MASK;
7104         data |= GRBM_REG_SGIT(0x700);
7105         WREG32(RLC_AUTO_PG_CTRL, data);
7106
7107 }
7108
7109 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7110 {
7111         cik_enable_gfx_cgpg(rdev, enable);
7112         cik_enable_gfx_static_mgpg(rdev, enable);
7113         cik_enable_gfx_dynamic_mgpg(rdev, enable);
7114 }
7115
7116 u32 cik_get_csb_size(struct radeon_device *rdev)
7117 {
7118         u32 count = 0;
7119         const struct cs_section_def *sect = NULL;
7120         const struct cs_extent_def *ext = NULL;
7121
7122         if (rdev->rlc.cs_data == NULL)
7123                 return 0;
7124
7125         /* begin clear state */
7126         count += 2;
7127         /* context control state */
7128         count += 3;
7129
7130         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7131                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7132                         if (sect->id == SECT_CONTEXT)
7133                                 count += 2 + ext->reg_count;
7134                         else
7135                                 return 0;
7136                 }
7137         }
7138         /* pa_sc_raster_config/pa_sc_raster_config1 */
7139         count += 4;
7140         /* end clear state */
7141         count += 2;
7142         /* clear state */
7143         count += 2;
7144
7145         return count;
7146 }
7147
7148 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7149 {
7150         u32 count = 0, i;
7151         const struct cs_section_def *sect = NULL;
7152         const struct cs_extent_def *ext = NULL;
7153
7154         if (rdev->rlc.cs_data == NULL)
7155                 return;
7156         if (buffer == NULL)
7157                 return;
7158
7159         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7160         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7161
7162         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7163         buffer[count++] = cpu_to_le32(0x80000000);
7164         buffer[count++] = cpu_to_le32(0x80000000);
7165
7166         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7167                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7168                         if (sect->id == SECT_CONTEXT) {
7169                                 buffer[count++] =
7170                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7171                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7172                                 for (i = 0; i < ext->reg_count; i++)
7173                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7174                         } else {
7175                                 return;
7176                         }
7177                 }
7178         }
7179
7180         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7181         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7182         switch (rdev->family) {
7183         case CHIP_BONAIRE:
7184                 buffer[count++] = cpu_to_le32(0x16000012);
7185                 buffer[count++] = cpu_to_le32(0x00000000);
7186                 break;
7187         case CHIP_KAVERI:
7188                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7189                 buffer[count++] = cpu_to_le32(0x00000000);
7190                 break;
7191         case CHIP_KABINI:
7192         case CHIP_MULLINS:
7193                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7194                 buffer[count++] = cpu_to_le32(0x00000000);
7195                 break;
7196         case CHIP_HAWAII:
7197                 buffer[count++] = cpu_to_le32(0x3a00161a);
7198                 buffer[count++] = cpu_to_le32(0x0000002e);
7199                 break;
7200         default:
7201                 buffer[count++] = cpu_to_le32(0x00000000);
7202                 buffer[count++] = cpu_to_le32(0x00000000);
7203                 break;
7204         }
7205
7206         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7207         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7208
7209         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7210         buffer[count++] = cpu_to_le32(0);
7211 }
7212
7213 static void cik_init_pg(struct radeon_device *rdev)
7214 {
7215         if (rdev->pg_flags) {
7216                 cik_enable_sck_slowdown_on_pu(rdev, true);
7217                 cik_enable_sck_slowdown_on_pd(rdev, true);
7218                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7219                         cik_init_gfx_cgpg(rdev);
7220                         cik_enable_cp_pg(rdev, true);
7221                         cik_enable_gds_pg(rdev, true);
7222                 }
7223                 cik_init_ao_cu_mask(rdev);
7224                 cik_update_gfx_pg(rdev, true);
7225         }
7226 }
7227
7228 static void cik_fini_pg(struct radeon_device *rdev)
7229 {
7230         if (rdev->pg_flags) {
7231                 cik_update_gfx_pg(rdev, false);
7232                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7233                         cik_enable_cp_pg(rdev, false);
7234                         cik_enable_gds_pg(rdev, false);
7235                 }
7236         }
7237 }
7238
7239 /*
7240  * Interrupts
7241  * Starting with r6xx, interrupts are handled via a ring buffer.
7242  * Ring buffers are areas of GPU accessible memory that the GPU
7243  * writes interrupt vectors into and the host reads vectors out of.
7244  * There is a rptr (read pointer) that determines where the
7245  * host is currently reading, and a wptr (write pointer)
7246  * which determines where the GPU has written.  When the
7247  * pointers are equal, the ring is idle.  When the GPU
7248  * writes vectors to the ring buffer, it increments the
7249  * wptr.  When there is an interrupt, the host then starts
7250  * fetching commands and processing them until the pointers are
7251  * equal again at which point it updates the rptr.
7252  */
7253
7254 /**
7255  * cik_enable_interrupts - Enable the interrupt ring buffer
7256  *
7257  * @rdev: radeon_device pointer
7258  *
7259  * Enable the interrupt ring buffer (CIK).
7260  */
7261 static void cik_enable_interrupts(struct radeon_device *rdev)
7262 {
7263         u32 ih_cntl = RREG32(IH_CNTL);
7264         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7265
7266         ih_cntl |= ENABLE_INTR;
7267         ih_rb_cntl |= IH_RB_ENABLE;
7268         WREG32(IH_CNTL, ih_cntl);
7269         WREG32(IH_RB_CNTL, ih_rb_cntl);
7270         rdev->ih.enabled = true;
7271 }
7272
7273 /**
7274  * cik_disable_interrupts - Disable the interrupt ring buffer
7275  *
7276  * @rdev: radeon_device pointer
7277  *
7278  * Disable the interrupt ring buffer (CIK).
7279  */
7280 static void cik_disable_interrupts(struct radeon_device *rdev)
7281 {
7282         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7283         u32 ih_cntl = RREG32(IH_CNTL);
7284
7285         ih_rb_cntl &= ~IH_RB_ENABLE;
7286         ih_cntl &= ~ENABLE_INTR;
7287         WREG32(IH_RB_CNTL, ih_rb_cntl);
7288         WREG32(IH_CNTL, ih_cntl);
7289         /* set rptr, wptr to 0 */
7290         WREG32(IH_RB_RPTR, 0);
7291         WREG32(IH_RB_WPTR, 0);
7292         rdev->ih.enabled = false;
7293         rdev->ih.rptr = 0;
7294 }
7295
7296 /**
7297  * cik_disable_interrupt_state - Disable all interrupt sources
7298  *
7299  * @rdev: radeon_device pointer
7300  *
7301  * Clear all interrupt enable bits used by the driver (CIK).
7302  */
7303 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7304 {
7305         u32 tmp;
7306
7307         /* gfx ring */
7308         tmp = RREG32(CP_INT_CNTL_RING0) &
7309                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7310         WREG32(CP_INT_CNTL_RING0, tmp);
7311         /* sdma */
7312         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7313         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7314         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7315         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7316         /* compute queues */
7317         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7318         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7319         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7320         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7321         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7322         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7323         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7324         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7325         /* grbm */
7326         WREG32(GRBM_INT_CNTL, 0);
7327         /* SRBM */
7328         WREG32(SRBM_INT_CNTL, 0);
7329         /* vline/vblank, etc. */
7330         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7331         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7332         if (rdev->num_crtc >= 4) {
7333                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7334                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7335         }
7336         if (rdev->num_crtc >= 6) {
7337                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7338                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7339         }
7340         /* pflip */
7341         if (rdev->num_crtc >= 2) {
7342                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7343                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7344         }
7345         if (rdev->num_crtc >= 4) {
7346                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7347                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7348         }
7349         if (rdev->num_crtc >= 6) {
7350                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7351                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7352         }
7353
7354         /* dac hotplug */
7355         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7356
7357         /* digital hotplug */
7358         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7359         WREG32(DC_HPD1_INT_CONTROL, tmp);
7360         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7361         WREG32(DC_HPD2_INT_CONTROL, tmp);
7362         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7363         WREG32(DC_HPD3_INT_CONTROL, tmp);
7364         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7365         WREG32(DC_HPD4_INT_CONTROL, tmp);
7366         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7367         WREG32(DC_HPD5_INT_CONTROL, tmp);
7368         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7369         WREG32(DC_HPD6_INT_CONTROL, tmp);
7370
7371 }
7372
7373 /**
7374  * cik_irq_init - init and enable the interrupt ring
7375  *
7376  * @rdev: radeon_device pointer
7377  *
7378  * Allocate a ring buffer for the interrupt controller,
7379  * enable the RLC, disable interrupts, enable the IH
7380  * ring buffer and enable it (CIK).
7381  * Called at device load and reume.
7382  * Returns 0 for success, errors for failure.
7383  */
7384 static int cik_irq_init(struct radeon_device *rdev)
7385 {
7386         int ret = 0;
7387         int rb_bufsz;
7388         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7389
7390         /* allocate ring */
7391         ret = r600_ih_ring_alloc(rdev);
7392         if (ret)
7393                 return ret;
7394
7395         /* disable irqs */
7396         cik_disable_interrupts(rdev);
7397
7398         /* init rlc */
7399         ret = cik_rlc_resume(rdev);
7400         if (ret) {
7401                 r600_ih_ring_fini(rdev);
7402                 return ret;
7403         }
7404
7405         /* setup interrupt control */
7406         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7407         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7408         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7409         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7410          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7411          */
7412         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7413         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7414         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7415         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7416
7417         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7418         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7419
7420         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7421                       IH_WPTR_OVERFLOW_CLEAR |
7422                       (rb_bufsz << 1));
7423
7424         if (rdev->wb.enabled)
7425                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7426
7427         /* set the writeback address whether it's enabled or not */
7428         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7429         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7430
7431         WREG32(IH_RB_CNTL, ih_rb_cntl);
7432
7433         /* set rptr, wptr to 0 */
7434         WREG32(IH_RB_RPTR, 0);
7435         WREG32(IH_RB_WPTR, 0);
7436
7437         /* Default settings for IH_CNTL (disabled at first) */
7438         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7439         /* RPTR_REARM only works if msi's are enabled */
7440         if (rdev->msi_enabled)
7441                 ih_cntl |= RPTR_REARM;
7442         WREG32(IH_CNTL, ih_cntl);
7443
7444         /* force the active interrupt state to all disabled */
7445         cik_disable_interrupt_state(rdev);
7446
7447         pci_set_master(rdev->pdev);
7448
7449         /* enable irqs */
7450         cik_enable_interrupts(rdev);
7451
7452         return ret;
7453 }
7454
7455 /**
7456  * cik_irq_set - enable/disable interrupt sources
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Enable interrupt sources on the GPU (vblanks, hpd,
7461  * etc.) (CIK).
7462  * Returns 0 for success, errors for failure.
7463  */
7464 int cik_irq_set(struct radeon_device *rdev)
7465 {
7466         u32 cp_int_cntl;
7467         u32 cp_m1p0;
7468         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7469         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7470         u32 grbm_int_cntl = 0;
7471         u32 dma_cntl, dma_cntl1;
7472
7473         if (!rdev->irq.installed) {
7474                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7475                 return -EINVAL;
7476         }
7477         /* don't enable anything if the ih is disabled */
7478         if (!rdev->ih.enabled) {
7479                 cik_disable_interrupts(rdev);
7480                 /* force the active interrupt state to all disabled */
7481                 cik_disable_interrupt_state(rdev);
7482                 return 0;
7483         }
7484
7485         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7486                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7487         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7488
7489         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7490         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7491         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7492         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7493         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7494         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7495
7496         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7497         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7498
7499         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7500
7501         /* enable CP interrupts on all rings */
7502         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7503                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7504                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7505         }
7506         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7507                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7508                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7509                 if (ring->me == 1) {
7510                         switch (ring->pipe) {
7511                         case 0:
7512                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7513                                 break;
7514                         default:
7515                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7516                                 break;
7517                         }
7518                 } else {
7519                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7520                 }
7521         }
7522         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7523                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7524                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7525                 if (ring->me == 1) {
7526                         switch (ring->pipe) {
7527                         case 0:
7528                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7529                                 break;
7530                         default:
7531                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7532                                 break;
7533                         }
7534                 } else {
7535                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7536                 }
7537         }
7538
7539         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7540                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7541                 dma_cntl |= TRAP_ENABLE;
7542         }
7543
7544         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7545                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7546                 dma_cntl1 |= TRAP_ENABLE;
7547         }
7548
7549         if (rdev->irq.crtc_vblank_int[0] ||
7550             atomic_read(&rdev->irq.pflip[0])) {
7551                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7552                 crtc1 |= VBLANK_INTERRUPT_MASK;
7553         }
7554         if (rdev->irq.crtc_vblank_int[1] ||
7555             atomic_read(&rdev->irq.pflip[1])) {
7556                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7557                 crtc2 |= VBLANK_INTERRUPT_MASK;
7558         }
7559         if (rdev->irq.crtc_vblank_int[2] ||
7560             atomic_read(&rdev->irq.pflip[2])) {
7561                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7562                 crtc3 |= VBLANK_INTERRUPT_MASK;
7563         }
7564         if (rdev->irq.crtc_vblank_int[3] ||
7565             atomic_read(&rdev->irq.pflip[3])) {
7566                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7567                 crtc4 |= VBLANK_INTERRUPT_MASK;
7568         }
7569         if (rdev->irq.crtc_vblank_int[4] ||
7570             atomic_read(&rdev->irq.pflip[4])) {
7571                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7572                 crtc5 |= VBLANK_INTERRUPT_MASK;
7573         }
7574         if (rdev->irq.crtc_vblank_int[5] ||
7575             atomic_read(&rdev->irq.pflip[5])) {
7576                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7577                 crtc6 |= VBLANK_INTERRUPT_MASK;
7578         }
7579         if (rdev->irq.hpd[0]) {
7580                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7581                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7582         }
7583         if (rdev->irq.hpd[1]) {
7584                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7585                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7586         }
7587         if (rdev->irq.hpd[2]) {
7588                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7589                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7590         }
7591         if (rdev->irq.hpd[3]) {
7592                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7593                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7594         }
7595         if (rdev->irq.hpd[4]) {
7596                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7597                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7598         }
7599         if (rdev->irq.hpd[5]) {
7600                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7601                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7602         }
7603
7604         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7605
7606         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7607         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7608
7609         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7610
7611         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7612
7613         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7614         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7615         if (rdev->num_crtc >= 4) {
7616                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7617                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7618         }
7619         if (rdev->num_crtc >= 6) {
7620                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7621                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7622         }
7623
7624         if (rdev->num_crtc >= 2) {
7625                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7626                        GRPH_PFLIP_INT_MASK);
7627                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7628                        GRPH_PFLIP_INT_MASK);
7629         }
7630         if (rdev->num_crtc >= 4) {
7631                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7632                        GRPH_PFLIP_INT_MASK);
7633                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7634                        GRPH_PFLIP_INT_MASK);
7635         }
7636         if (rdev->num_crtc >= 6) {
7637                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7638                        GRPH_PFLIP_INT_MASK);
7639                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7640                        GRPH_PFLIP_INT_MASK);
7641         }
7642
7643         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7644         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7645         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7646         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7647         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7648         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7649
7650         /* posting read */
7651         RREG32(SRBM_STATUS);
7652
7653         return 0;
7654 }
7655
7656 /**
7657  * cik_irq_ack - ack interrupt sources
7658  *
7659  * @rdev: radeon_device pointer
7660  *
7661  * Ack interrupt sources on the GPU (vblanks, hpd,
7662  * etc.) (CIK).  Certain interrupts sources are sw
7663  * generated and do not require an explicit ack.
7664  */
7665 static inline void cik_irq_ack(struct radeon_device *rdev)
7666 {
7667         u32 tmp;
7668
7669         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7670         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7671         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7672         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7673         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7674         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7675         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7676
7677         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7678                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7679         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7680                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7681         if (rdev->num_crtc >= 4) {
7682                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7683                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7684                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7685                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7686         }
7687         if (rdev->num_crtc >= 6) {
7688                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7689                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7690                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7691                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7692         }
7693
7694         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7695                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7696                        GRPH_PFLIP_INT_CLEAR);
7697         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7698                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7699                        GRPH_PFLIP_INT_CLEAR);
7700         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7701                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7702         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7703                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7704         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7705                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7706         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7707                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7708
7709         if (rdev->num_crtc >= 4) {
7710                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7711                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7712                                GRPH_PFLIP_INT_CLEAR);
7713                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7714                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7715                                GRPH_PFLIP_INT_CLEAR);
7716                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7717                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7718                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7719                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7720                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7721                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7722                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7723                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7724         }
7725
7726         if (rdev->num_crtc >= 6) {
7727                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7728                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7729                                GRPH_PFLIP_INT_CLEAR);
7730                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7731                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7732                                GRPH_PFLIP_INT_CLEAR);
7733                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7734                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7735                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7736                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7737                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7738                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7739                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7740                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7741         }
7742
7743         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7744                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7745                 tmp |= DC_HPDx_INT_ACK;
7746                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7747         }
7748         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7749                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7750                 tmp |= DC_HPDx_INT_ACK;
7751                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7752         }
7753         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7754                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7755                 tmp |= DC_HPDx_INT_ACK;
7756                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7757         }
7758         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7759                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7760                 tmp |= DC_HPDx_INT_ACK;
7761                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7762         }
7763         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7764                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7765                 tmp |= DC_HPDx_INT_ACK;
7766                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7767         }
7768         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7769                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7770                 tmp |= DC_HPDx_INT_ACK;
7771                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7772         }
7773         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7774                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7775                 tmp |= DC_HPDx_RX_INT_ACK;
7776                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7777         }
7778         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7779                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7780                 tmp |= DC_HPDx_RX_INT_ACK;
7781                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7782         }
7783         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7784                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7785                 tmp |= DC_HPDx_RX_INT_ACK;
7786                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7787         }
7788         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7789                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7790                 tmp |= DC_HPDx_RX_INT_ACK;
7791                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7792         }
7793         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7794                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7795                 tmp |= DC_HPDx_RX_INT_ACK;
7796                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7797         }
7798         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7799                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7800                 tmp |= DC_HPDx_RX_INT_ACK;
7801                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7802         }
7803 }
7804
7805 /**
7806  * cik_irq_disable - disable interrupts
7807  *
7808  * @rdev: radeon_device pointer
7809  *
7810  * Disable interrupts on the hw (CIK).
7811  */
7812 static void cik_irq_disable(struct radeon_device *rdev)
7813 {
7814         cik_disable_interrupts(rdev);
7815         /* Wait and acknowledge irq */
7816         mdelay(1);
7817         cik_irq_ack(rdev);
7818         cik_disable_interrupt_state(rdev);
7819 }
7820
7821 /**
7822  * cik_irq_disable - disable interrupts for suspend
7823  *
7824  * @rdev: radeon_device pointer
7825  *
7826  * Disable interrupts and stop the RLC (CIK).
7827  * Used for suspend.
7828  */
7829 static void cik_irq_suspend(struct radeon_device *rdev)
7830 {
7831         cik_irq_disable(rdev);
7832         cik_rlc_stop(rdev);
7833 }
7834
7835 /**
7836  * cik_irq_fini - tear down interrupt support
7837  *
7838  * @rdev: radeon_device pointer
7839  *
7840  * Disable interrupts on the hw and free the IH ring
7841  * buffer (CIK).
7842  * Used for driver unload.
7843  */
7844 static void cik_irq_fini(struct radeon_device *rdev)
7845 {
7846         cik_irq_suspend(rdev);
7847         r600_ih_ring_fini(rdev);
7848 }
7849
7850 /**
7851  * cik_get_ih_wptr - get the IH ring buffer wptr
7852  *
7853  * @rdev: radeon_device pointer
7854  *
7855  * Get the IH ring buffer wptr from either the register
7856  * or the writeback memory buffer (CIK).  Also check for
7857  * ring buffer overflow and deal with it.
7858  * Used by cik_irq_process().
7859  * Returns the value of the wptr.
7860  */
7861 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7862 {
7863         u32 wptr, tmp;
7864
7865         if (rdev->wb.enabled)
7866                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7867         else
7868                 wptr = RREG32(IH_RB_WPTR);
7869
7870         if (wptr & RB_OVERFLOW) {
7871                 wptr &= ~RB_OVERFLOW;
7872                 /* When a ring buffer overflow happen start parsing interrupt
7873                  * from the last not overwritten vector (wptr + 16). Hopefully
7874                  * this should allow us to catchup.
7875                  */
7876                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7877                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7878                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7879                 tmp = RREG32(IH_RB_CNTL);
7880                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7881                 WREG32(IH_RB_CNTL, tmp);
7882         }
7883         return (wptr & rdev->ih.ptr_mask);
7884 }
7885
7886 /*        CIK IV Ring
7887  * Each IV ring entry is 128 bits:
7888  * [7:0]    - interrupt source id
7889  * [31:8]   - reserved
7890  * [59:32]  - interrupt source data
7891  * [63:60]  - reserved
7892  * [71:64]  - RINGID
7893  *            CP:
7894  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7895  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7896  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7897  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7898  *            PIPE_ID - ME0 0=3D
7899  *                    - ME1&2 compute dispatcher (4 pipes each)
7900  *            SDMA:
7901  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7902  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7903  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7904  * [79:72]  - VMID
7905  * [95:80]  - PASID
7906  * [127:96] - reserved
7907  */
7908 /**
7909  * cik_irq_process - interrupt handler
7910  *
7911  * @rdev: radeon_device pointer
7912  *
7913  * Interrupt hander (CIK).  Walk the IH ring,
7914  * ack interrupts and schedule work to handle
7915  * interrupt events.
7916  * Returns irq process return code.
7917  */
7918 int cik_irq_process(struct radeon_device *rdev)
7919 {
7920         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7921         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7922         u32 wptr;
7923         u32 rptr;
7924         u32 src_id, src_data, ring_id;
7925         u8 me_id, pipe_id, queue_id;
7926         u32 ring_index;
7927         bool queue_hotplug = false;
7928         bool queue_dp = false;
7929         bool queue_reset = false;
7930         u32 addr, status, mc_client;
7931         bool queue_thermal = false;
7932
7933         if (!rdev->ih.enabled || rdev->shutdown)
7934                 return IRQ_NONE;
7935
7936         wptr = cik_get_ih_wptr(rdev);
7937
7938 restart_ih:
7939         /* is somebody else already processing irqs? */
7940         if (atomic_xchg(&rdev->ih.lock, 1))
7941                 return IRQ_NONE;
7942
7943         rptr = rdev->ih.rptr;
7944         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7945
7946         /* Order reading of wptr vs. reading of IH ring data */
7947         rmb();
7948
7949         /* display interrupts */
7950         cik_irq_ack(rdev);
7951
7952         while (rptr != wptr) {
7953                 /* wptr/rptr are in bytes! */
7954                 ring_index = rptr / 4;
7955
7956                 radeon_kfd_interrupt(rdev,
7957                                 (const void *) &rdev->ih.ring[ring_index]);
7958
7959                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7960                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7961                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7962
7963                 switch (src_id) {
7964                 case 1: /* D1 vblank/vline */
7965                         switch (src_data) {
7966                         case 0: /* D1 vblank */
7967                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7968                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7969
7970                                 if (rdev->irq.crtc_vblank_int[0]) {
7971                                         drm_handle_vblank(rdev->ddev, 0);
7972                                         rdev->pm.vblank_sync = true;
7973                                         wake_up(&rdev->irq.vblank_queue);
7974                                 }
7975                                 if (atomic_read(&rdev->irq.pflip[0]))
7976                                         radeon_crtc_handle_vblank(rdev, 0);
7977                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7978                                 DRM_DEBUG("IH: D1 vblank\n");
7979
7980                                 break;
7981                         case 1: /* D1 vline */
7982                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7983                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7984
7985                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7986                                 DRM_DEBUG("IH: D1 vline\n");
7987
7988                                 break;
7989                         default:
7990                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7991                                 break;
7992                         }
7993                         break;
7994                 case 2: /* D2 vblank/vline */
7995                         switch (src_data) {
7996                         case 0: /* D2 vblank */
7997                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7998                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7999
8000                                 if (rdev->irq.crtc_vblank_int[1]) {
8001                                         drm_handle_vblank(rdev->ddev, 1);
8002                                         rdev->pm.vblank_sync = true;
8003                                         wake_up(&rdev->irq.vblank_queue);
8004                                 }
8005                                 if (atomic_read(&rdev->irq.pflip[1]))
8006                                         radeon_crtc_handle_vblank(rdev, 1);
8007                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
8008                                 DRM_DEBUG("IH: D2 vblank\n");
8009
8010                                 break;
8011                         case 1: /* D2 vline */
8012                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
8013                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8014
8015                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
8016                                 DRM_DEBUG("IH: D2 vline\n");
8017
8018                                 break;
8019                         default:
8020                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8021                                 break;
8022                         }
8023                         break;
8024                 case 3: /* D3 vblank/vline */
8025                         switch (src_data) {
8026                         case 0: /* D3 vblank */
8027                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8028                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8029
8030                                 if (rdev->irq.crtc_vblank_int[2]) {
8031                                         drm_handle_vblank(rdev->ddev, 2);
8032                                         rdev->pm.vblank_sync = true;
8033                                         wake_up(&rdev->irq.vblank_queue);
8034                                 }
8035                                 if (atomic_read(&rdev->irq.pflip[2]))
8036                                         radeon_crtc_handle_vblank(rdev, 2);
8037                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8038                                 DRM_DEBUG("IH: D3 vblank\n");
8039
8040                                 break;
8041                         case 1: /* D3 vline */
8042                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8043                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8044
8045                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8046                                 DRM_DEBUG("IH: D3 vline\n");
8047
8048                                 break;
8049                         default:
8050                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8051                                 break;
8052                         }
8053                         break;
8054                 case 4: /* D4 vblank/vline */
8055                         switch (src_data) {
8056                         case 0: /* D4 vblank */
8057                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8058                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8059
8060                                 if (rdev->irq.crtc_vblank_int[3]) {
8061                                         drm_handle_vblank(rdev->ddev, 3);
8062                                         rdev->pm.vblank_sync = true;
8063                                         wake_up(&rdev->irq.vblank_queue);
8064                                 }
8065                                 if (atomic_read(&rdev->irq.pflip[3]))
8066                                         radeon_crtc_handle_vblank(rdev, 3);
8067                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8068                                 DRM_DEBUG("IH: D4 vblank\n");
8069
8070                                 break;
8071                         case 1: /* D4 vline */
8072                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8073                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8074
8075                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8076                                 DRM_DEBUG("IH: D4 vline\n");
8077
8078                                 break;
8079                         default:
8080                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8081                                 break;
8082                         }
8083                         break;
8084                 case 5: /* D5 vblank/vline */
8085                         switch (src_data) {
8086                         case 0: /* D5 vblank */
8087                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8088                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8089
8090                                 if (rdev->irq.crtc_vblank_int[4]) {
8091                                         drm_handle_vblank(rdev->ddev, 4);
8092                                         rdev->pm.vblank_sync = true;
8093                                         wake_up(&rdev->irq.vblank_queue);
8094                                 }
8095                                 if (atomic_read(&rdev->irq.pflip[4]))
8096                                         radeon_crtc_handle_vblank(rdev, 4);
8097                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8098                                 DRM_DEBUG("IH: D5 vblank\n");
8099
8100                                 break;
8101                         case 1: /* D5 vline */
8102                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8103                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8104
8105                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8106                                 DRM_DEBUG("IH: D5 vline\n");
8107
8108                                 break;
8109                         default:
8110                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8111                                 break;
8112                         }
8113                         break;
8114                 case 6: /* D6 vblank/vline */
8115                         switch (src_data) {
8116                         case 0: /* D6 vblank */
8117                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8118                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8119
8120                                 if (rdev->irq.crtc_vblank_int[5]) {
8121                                         drm_handle_vblank(rdev->ddev, 5);
8122                                         rdev->pm.vblank_sync = true;
8123                                         wake_up(&rdev->irq.vblank_queue);
8124                                 }
8125                                 if (atomic_read(&rdev->irq.pflip[5]))
8126                                         radeon_crtc_handle_vblank(rdev, 5);
8127                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8128                                 DRM_DEBUG("IH: D6 vblank\n");
8129
8130                                 break;
8131                         case 1: /* D6 vline */
8132                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8133                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8134
8135                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8136                                 DRM_DEBUG("IH: D6 vline\n");
8137
8138                                 break;
8139                         default:
8140                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8141                                 break;
8142                         }
8143                         break;
8144                 case 8: /* D1 page flip */
8145                 case 10: /* D2 page flip */
8146                 case 12: /* D3 page flip */
8147                 case 14: /* D4 page flip */
8148                 case 16: /* D5 page flip */
8149                 case 18: /* D6 page flip */
8150                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8151                         if (radeon_use_pflipirq > 0)
8152                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8153                         break;
8154                 case 42: /* HPD hotplug */
8155                         switch (src_data) {
8156                         case 0:
8157                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8158                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8159
8160                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8161                                 queue_hotplug = true;
8162                                 DRM_DEBUG("IH: HPD1\n");
8163
8164                                 break;
8165                         case 1:
8166                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8167                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8168
8169                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8170                                 queue_hotplug = true;
8171                                 DRM_DEBUG("IH: HPD2\n");
8172
8173                                 break;
8174                         case 2:
8175                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8176                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8177
8178                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8179                                 queue_hotplug = true;
8180                                 DRM_DEBUG("IH: HPD3\n");
8181
8182                                 break;
8183                         case 3:
8184                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8185                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8186
8187                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8188                                 queue_hotplug = true;
8189                                 DRM_DEBUG("IH: HPD4\n");
8190
8191                                 break;
8192                         case 4:
8193                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8194                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8195
8196                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8197                                 queue_hotplug = true;
8198                                 DRM_DEBUG("IH: HPD5\n");
8199
8200                                 break;
8201                         case 5:
8202                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8203                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8204
8205                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8206                                 queue_hotplug = true;
8207                                 DRM_DEBUG("IH: HPD6\n");
8208
8209                                 break;
8210                         case 6:
8211                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8212                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8213
8214                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8215                                 queue_dp = true;
8216                                 DRM_DEBUG("IH: HPD_RX 1\n");
8217
8218                                 break;
8219                         case 7:
8220                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8221                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8222
8223                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8224                                 queue_dp = true;
8225                                 DRM_DEBUG("IH: HPD_RX 2\n");
8226
8227                                 break;
8228                         case 8:
8229                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8230                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8231
8232                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8233                                 queue_dp = true;
8234                                 DRM_DEBUG("IH: HPD_RX 3\n");
8235
8236                                 break;
8237                         case 9:
8238                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8239                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8240
8241                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8242                                 queue_dp = true;
8243                                 DRM_DEBUG("IH: HPD_RX 4\n");
8244
8245                                 break;
8246                         case 10:
8247                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8248                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8249
8250                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8251                                 queue_dp = true;
8252                                 DRM_DEBUG("IH: HPD_RX 5\n");
8253
8254                                 break;
8255                         case 11:
8256                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8257                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8258
8259                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8260                                 queue_dp = true;
8261                                 DRM_DEBUG("IH: HPD_RX 6\n");
8262
8263                                 break;
8264                         default:
8265                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8266                                 break;
8267                         }
8268                         break;
8269                 case 96:
8270                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8271                         WREG32(SRBM_INT_ACK, 0x1);
8272                         break;
8273                 case 124: /* UVD */
8274                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8275                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8276                         break;
8277                 case 146:
8278                 case 147:
8279                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8280                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8281                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8282                         /* reset addr and status */
8283                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8284                         if (addr == 0x0 && status == 0x0)
8285                                 break;
8286                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8287                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8288                                 addr);
8289                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8290                                 status);
8291                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8292                         break;
8293                 case 167: /* VCE */
8294                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8295                         switch (src_data) {
8296                         case 0:
8297                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8298                                 break;
8299                         case 1:
8300                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8301                                 break;
8302                         default:
8303                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8304                                 break;
8305                         }
8306                         break;
8307                 case 176: /* GFX RB CP_INT */
8308                 case 177: /* GFX IB CP_INT */
8309                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8310                         break;
8311                 case 181: /* CP EOP event */
8312                         DRM_DEBUG("IH: CP EOP\n");
8313                         /* XXX check the bitfield order! */
8314                         me_id = (ring_id & 0x60) >> 5;
8315                         pipe_id = (ring_id & 0x18) >> 3;
8316                         queue_id = (ring_id & 0x7) >> 0;
8317                         switch (me_id) {
8318                         case 0:
8319                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8320                                 break;
8321                         case 1:
8322                         case 2:
8323                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8324                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8325                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8326                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8327                                 break;
8328                         }
8329                         break;
8330                 case 184: /* CP Privileged reg access */
8331                         DRM_ERROR("Illegal register access in command stream\n");
8332                         /* XXX check the bitfield order! */
8333                         me_id = (ring_id & 0x60) >> 5;
8334                         pipe_id = (ring_id & 0x18) >> 3;
8335                         queue_id = (ring_id & 0x7) >> 0;
8336                         switch (me_id) {
8337                         case 0:
8338                                 /* This results in a full GPU reset, but all we need to do is soft
8339                                  * reset the CP for gfx
8340                                  */
8341                                 queue_reset = true;
8342                                 break;
8343                         case 1:
8344                                 /* XXX compute */
8345                                 queue_reset = true;
8346                                 break;
8347                         case 2:
8348                                 /* XXX compute */
8349                                 queue_reset = true;
8350                                 break;
8351                         }
8352                         break;
8353                 case 185: /* CP Privileged inst */
8354                         DRM_ERROR("Illegal instruction in command stream\n");
8355                         /* XXX check the bitfield order! */
8356                         me_id = (ring_id & 0x60) >> 5;
8357                         pipe_id = (ring_id & 0x18) >> 3;
8358                         queue_id = (ring_id & 0x7) >> 0;
8359                         switch (me_id) {
8360                         case 0:
8361                                 /* This results in a full GPU reset, but all we need to do is soft
8362                                  * reset the CP for gfx
8363                                  */
8364                                 queue_reset = true;
8365                                 break;
8366                         case 1:
8367                                 /* XXX compute */
8368                                 queue_reset = true;
8369                                 break;
8370                         case 2:
8371                                 /* XXX compute */
8372                                 queue_reset = true;
8373                                 break;
8374                         }
8375                         break;
8376                 case 224: /* SDMA trap event */
8377                         /* XXX check the bitfield order! */
8378                         me_id = (ring_id & 0x3) >> 0;
8379                         queue_id = (ring_id & 0xc) >> 2;
8380                         DRM_DEBUG("IH: SDMA trap\n");
8381                         switch (me_id) {
8382                         case 0:
8383                                 switch (queue_id) {
8384                                 case 0:
8385                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8386                                         break;
8387                                 case 1:
8388                                         /* XXX compute */
8389                                         break;
8390                                 case 2:
8391                                         /* XXX compute */
8392                                         break;
8393                                 }
8394                                 break;
8395                         case 1:
8396                                 switch (queue_id) {
8397                                 case 0:
8398                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8399                                         break;
8400                                 case 1:
8401                                         /* XXX compute */
8402                                         break;
8403                                 case 2:
8404                                         /* XXX compute */
8405                                         break;
8406                                 }
8407                                 break;
8408                         }
8409                         break;
8410                 case 230: /* thermal low to high */
8411                         DRM_DEBUG("IH: thermal low to high\n");
8412                         rdev->pm.dpm.thermal.high_to_low = false;
8413                         queue_thermal = true;
8414                         break;
8415                 case 231: /* thermal high to low */
8416                         DRM_DEBUG("IH: thermal high to low\n");
8417                         rdev->pm.dpm.thermal.high_to_low = true;
8418                         queue_thermal = true;
8419                         break;
8420                 case 233: /* GUI IDLE */
8421                         DRM_DEBUG("IH: GUI idle\n");
8422                         break;
8423                 case 241: /* SDMA Privileged inst */
8424                 case 247: /* SDMA Privileged inst */
8425                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8426                         /* XXX check the bitfield order! */
8427                         me_id = (ring_id & 0x3) >> 0;
8428                         queue_id = (ring_id & 0xc) >> 2;
8429                         switch (me_id) {
8430                         case 0:
8431                                 switch (queue_id) {
8432                                 case 0:
8433                                         queue_reset = true;
8434                                         break;
8435                                 case 1:
8436                                         /* XXX compute */
8437                                         queue_reset = true;
8438                                         break;
8439                                 case 2:
8440                                         /* XXX compute */
8441                                         queue_reset = true;
8442                                         break;
8443                                 }
8444                                 break;
8445                         case 1:
8446                                 switch (queue_id) {
8447                                 case 0:
8448                                         queue_reset = true;
8449                                         break;
8450                                 case 1:
8451                                         /* XXX compute */
8452                                         queue_reset = true;
8453                                         break;
8454                                 case 2:
8455                                         /* XXX compute */
8456                                         queue_reset = true;
8457                                         break;
8458                                 }
8459                                 break;
8460                         }
8461                         break;
8462                 default:
8463                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8464                         break;
8465                 }
8466
8467                 /* wptr/rptr are in bytes! */
8468                 rptr += 16;
8469                 rptr &= rdev->ih.ptr_mask;
8470                 WREG32(IH_RB_RPTR, rptr);
8471         }
8472         if (queue_dp)
8473                 schedule_work(&rdev->dp_work);
8474         if (queue_hotplug)
8475                 schedule_work(&rdev->hotplug_work);
8476         if (queue_reset) {
8477                 rdev->needs_reset = true;
8478                 wake_up_all(&rdev->fence_queue);
8479         }
8480         if (queue_thermal)
8481                 schedule_work(&rdev->pm.dpm.thermal.work);
8482         rdev->ih.rptr = rptr;
8483         atomic_set(&rdev->ih.lock, 0);
8484
8485         /* make sure wptr hasn't changed while processing */
8486         wptr = cik_get_ih_wptr(rdev);
8487         if (wptr != rptr)
8488                 goto restart_ih;
8489
8490         return IRQ_HANDLED;
8491 }
8492
8493 /*
8494  * startup/shutdown callbacks
8495  */
8496 /**
8497  * cik_startup - program the asic to a functional state
8498  *
8499  * @rdev: radeon_device pointer
8500  *
8501  * Programs the asic to a functional state (CIK).
8502  * Called by cik_init() and cik_resume().
8503  * Returns 0 for success, error for failure.
8504  */
8505 static int cik_startup(struct radeon_device *rdev)
8506 {
8507         struct radeon_ring *ring;
8508         u32 nop;
8509         int r;
8510
8511         /* enable pcie gen2/3 link */
8512         cik_pcie_gen3_enable(rdev);
8513         /* enable aspm */
8514         cik_program_aspm(rdev);
8515
8516         /* scratch needs to be initialized before MC */
8517         r = r600_vram_scratch_init(rdev);
8518         if (r)
8519                 return r;
8520
8521         cik_mc_program(rdev);
8522
8523         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8524                 r = ci_mc_load_microcode(rdev);
8525                 if (r) {
8526                         DRM_ERROR("Failed to load MC firmware!\n");
8527                         return r;
8528                 }
8529         }
8530
8531         r = cik_pcie_gart_enable(rdev);
8532         if (r)
8533                 return r;
8534         cik_gpu_init(rdev);
8535
8536         /* allocate rlc buffers */
8537         if (rdev->flags & RADEON_IS_IGP) {
8538                 if (rdev->family == CHIP_KAVERI) {
8539                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8540                         rdev->rlc.reg_list_size =
8541                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8542                 } else {
8543                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8544                         rdev->rlc.reg_list_size =
8545                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8546                 }
8547         }
8548         rdev->rlc.cs_data = ci_cs_data;
8549         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8550         r = sumo_rlc_init(rdev);
8551         if (r) {
8552                 DRM_ERROR("Failed to init rlc BOs!\n");
8553                 return r;
8554         }
8555
8556         /* allocate wb buffer */
8557         r = radeon_wb_init(rdev);
8558         if (r)
8559                 return r;
8560
8561         /* allocate mec buffers */
8562         r = cik_mec_init(rdev);
8563         if (r) {
8564                 DRM_ERROR("Failed to init MEC BOs!\n");
8565                 return r;
8566         }
8567
8568         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8569         if (r) {
8570                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8571                 return r;
8572         }
8573
8574         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8575         if (r) {
8576                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8577                 return r;
8578         }
8579
8580         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8581         if (r) {
8582                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8583                 return r;
8584         }
8585
8586         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8587         if (r) {
8588                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8589                 return r;
8590         }
8591
8592         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8593         if (r) {
8594                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8595                 return r;
8596         }
8597
8598         r = radeon_uvd_resume(rdev);
8599         if (!r) {
8600                 r = uvd_v4_2_resume(rdev);
8601                 if (!r) {
8602                         r = radeon_fence_driver_start_ring(rdev,
8603                                                            R600_RING_TYPE_UVD_INDEX);
8604                         if (r)
8605                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8606                 }
8607         }
8608         if (r)
8609                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8610
8611         r = radeon_vce_resume(rdev);
8612         if (!r) {
8613                 r = vce_v2_0_resume(rdev);
8614                 if (!r)
8615                         r = radeon_fence_driver_start_ring(rdev,
8616                                                            TN_RING_TYPE_VCE1_INDEX);
8617                 if (!r)
8618                         r = radeon_fence_driver_start_ring(rdev,
8619                                                            TN_RING_TYPE_VCE2_INDEX);
8620         }
8621         if (r) {
8622                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8623                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8624                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8625         }
8626
8627         /* Enable IRQ */
8628         if (!rdev->irq.installed) {
8629                 r = radeon_irq_kms_init(rdev);
8630                 if (r)
8631                         return r;
8632         }
8633
8634         r = cik_irq_init(rdev);
8635         if (r) {
8636                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8637                 radeon_irq_kms_fini(rdev);
8638                 return r;
8639         }
8640         cik_irq_set(rdev);
8641
8642         if (rdev->family == CHIP_HAWAII) {
8643                 if (rdev->new_fw)
8644                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8645                 else
8646                         nop = RADEON_CP_PACKET2;
8647         } else {
8648                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8649         }
8650
8651         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8652         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8653                              nop);
8654         if (r)
8655                 return r;
8656
8657         /* set up the compute queues */
8658         /* type-2 packets are deprecated on MEC, use type-3 instead */
8659         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8660         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8661                              nop);
8662         if (r)
8663                 return r;
8664         ring->me = 1; /* first MEC */
8665         ring->pipe = 0; /* first pipe */
8666         ring->queue = 0; /* first queue */
8667         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8668
8669         /* type-2 packets are deprecated on MEC, use type-3 instead */
8670         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8671         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8672                              nop);
8673         if (r)
8674                 return r;
8675         /* dGPU only have 1 MEC */
8676         ring->me = 1; /* first MEC */
8677         ring->pipe = 0; /* first pipe */
8678         ring->queue = 1; /* second queue */
8679         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8680
8681         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8682         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8683                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8684         if (r)
8685                 return r;
8686
8687         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8688         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8689                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8690         if (r)
8691                 return r;
8692
8693         r = cik_cp_resume(rdev);
8694         if (r)
8695                 return r;
8696
8697         r = cik_sdma_resume(rdev);
8698         if (r)
8699                 return r;
8700
8701         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8702         if (ring->ring_size) {
8703                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8704                                      RADEON_CP_PACKET2);
8705                 if (!r)
8706                         r = uvd_v1_0_init(rdev);
8707                 if (r)
8708                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8709         }
8710
8711         r = -ENOENT;
8712
8713         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8714         if (ring->ring_size)
8715                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8716                                      VCE_CMD_NO_OP);
8717
8718         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8719         if (ring->ring_size)
8720                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8721                                      VCE_CMD_NO_OP);
8722
8723         if (!r)
8724                 r = vce_v1_0_init(rdev);
8725         else if (r != -ENOENT)
8726                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8727
8728         r = radeon_ib_pool_init(rdev);
8729         if (r) {
8730                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8731                 return r;
8732         }
8733
8734         r = radeon_vm_manager_init(rdev);
8735         if (r) {
8736                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8737                 return r;
8738         }
8739
8740         r = radeon_audio_init(rdev);
8741         if (r)
8742                 return r;
8743
8744         r = radeon_kfd_resume(rdev);
8745         if (r)
8746                 return r;
8747
8748         return 0;
8749 }
8750
8751 /**
8752  * cik_resume - resume the asic to a functional state
8753  *
8754  * @rdev: radeon_device pointer
8755  *
8756  * Programs the asic to a functional state (CIK).
8757  * Called at resume.
8758  * Returns 0 for success, error for failure.
8759  */
8760 int cik_resume(struct radeon_device *rdev)
8761 {
8762         int r;
8763
8764         /* post card */
8765         atom_asic_init(rdev->mode_info.atom_context);
8766
8767         /* init golden registers */
8768         cik_init_golden_registers(rdev);
8769
8770         if (rdev->pm.pm_method == PM_METHOD_DPM)
8771                 radeon_pm_resume(rdev);
8772
8773         rdev->accel_working = true;
8774         r = cik_startup(rdev);
8775         if (r) {
8776                 DRM_ERROR("cik startup failed on resume\n");
8777                 rdev->accel_working = false;
8778                 return r;
8779         }
8780
8781         return r;
8782
8783 }
8784
8785 /**
8786  * cik_suspend - suspend the asic
8787  *
8788  * @rdev: radeon_device pointer
8789  *
8790  * Bring the chip into a state suitable for suspend (CIK).
8791  * Called at suspend.
8792  * Returns 0 for success.
8793  */
8794 int cik_suspend(struct radeon_device *rdev)
8795 {
8796         radeon_kfd_suspend(rdev);
8797         radeon_pm_suspend(rdev);
8798         radeon_audio_fini(rdev);
8799         radeon_vm_manager_fini(rdev);
8800         cik_cp_enable(rdev, false);
8801         cik_sdma_enable(rdev, false);
8802         uvd_v1_0_fini(rdev);
8803         radeon_uvd_suspend(rdev);
8804         radeon_vce_suspend(rdev);
8805         cik_fini_pg(rdev);
8806         cik_fini_cg(rdev);
8807         cik_irq_suspend(rdev);
8808         radeon_wb_disable(rdev);
8809         cik_pcie_gart_disable(rdev);
8810         return 0;
8811 }
8812
8813 /* Plan is to move initialization in that function and use
8814  * helper function so that radeon_device_init pretty much
8815  * do nothing more than calling asic specific function. This
8816  * should also allow to remove a bunch of callback function
8817  * like vram_info.
8818  */
8819 /**
8820  * cik_init - asic specific driver and hw init
8821  *
8822  * @rdev: radeon_device pointer
8823  *
8824  * Setup asic specific driver variables and program the hw
8825  * to a functional state (CIK).
8826  * Called at driver startup.
8827  * Returns 0 for success, errors for failure.
8828  */
8829 int cik_init(struct radeon_device *rdev)
8830 {
8831         struct radeon_ring *ring;
8832         int r;
8833
8834         /* Read BIOS */
8835         if (!radeon_get_bios(rdev)) {
8836                 if (ASIC_IS_AVIVO(rdev))
8837                         return -EINVAL;
8838         }
8839         /* Must be an ATOMBIOS */
8840         if (!rdev->is_atom_bios) {
8841                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8842                 return -EINVAL;
8843         }
8844         r = radeon_atombios_init(rdev);
8845         if (r)
8846                 return r;
8847
8848         /* Post card if necessary */
8849         if (!radeon_card_posted(rdev)) {
8850                 if (!rdev->bios) {
8851                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8852                         return -EINVAL;
8853                 }
8854                 DRM_INFO("GPU not posted. posting now...\n");
8855                 atom_asic_init(rdev->mode_info.atom_context);
8856         }
8857         /* init golden registers */
8858         cik_init_golden_registers(rdev);
8859         /* Initialize scratch registers */
8860         cik_scratch_init(rdev);
8861         /* Initialize surface registers */
8862         radeon_surface_init(rdev);
8863         /* Initialize clocks */
8864         radeon_get_clock_info(rdev->ddev);
8865
8866         /* Fence driver */
8867         r = radeon_fence_driver_init(rdev);
8868         if (r)
8869                 return r;
8870
8871         /* initialize memory controller */
8872         r = cik_mc_init(rdev);
8873         if (r)
8874                 return r;
8875         /* Memory manager */
8876         r = radeon_bo_init(rdev);
8877         if (r)
8878                 return r;
8879
8880         if (rdev->flags & RADEON_IS_IGP) {
8881                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8882                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8883                         r = cik_init_microcode(rdev);
8884                         if (r) {
8885                                 DRM_ERROR("Failed to load firmware!\n");
8886                                 return r;
8887                         }
8888                 }
8889         } else {
8890                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8891                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8892                     !rdev->mc_fw) {
8893                         r = cik_init_microcode(rdev);
8894                         if (r) {
8895                                 DRM_ERROR("Failed to load firmware!\n");
8896                                 return r;
8897                         }
8898                 }
8899         }
8900
8901         /* Initialize power management */
8902         radeon_pm_init(rdev);
8903
8904         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8905         ring->ring_obj = NULL;
8906         r600_ring_init(rdev, ring, 1024 * 1024);
8907
8908         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8909         ring->ring_obj = NULL;
8910         r600_ring_init(rdev, ring, 1024 * 1024);
8911         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8912         if (r)
8913                 return r;
8914
8915         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8916         ring->ring_obj = NULL;
8917         r600_ring_init(rdev, ring, 1024 * 1024);
8918         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8919         if (r)
8920                 return r;
8921
8922         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8923         ring->ring_obj = NULL;
8924         r600_ring_init(rdev, ring, 256 * 1024);
8925
8926         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8927         ring->ring_obj = NULL;
8928         r600_ring_init(rdev, ring, 256 * 1024);
8929
8930         r = radeon_uvd_init(rdev);
8931         if (!r) {
8932                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8933                 ring->ring_obj = NULL;
8934                 r600_ring_init(rdev, ring, 4096);
8935         }
8936
8937         r = radeon_vce_init(rdev);
8938         if (!r) {
8939                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8940                 ring->ring_obj = NULL;
8941                 r600_ring_init(rdev, ring, 4096);
8942
8943                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8944                 ring->ring_obj = NULL;
8945                 r600_ring_init(rdev, ring, 4096);
8946         }
8947
8948         rdev->ih.ring_obj = NULL;
8949         r600_ih_ring_init(rdev, 64 * 1024);
8950
8951         r = r600_pcie_gart_init(rdev);
8952         if (r)
8953                 return r;
8954
8955         rdev->accel_working = true;
8956         r = cik_startup(rdev);
8957         if (r) {
8958                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8959                 cik_cp_fini(rdev);
8960                 cik_sdma_fini(rdev);
8961                 cik_irq_fini(rdev);
8962                 sumo_rlc_fini(rdev);
8963                 cik_mec_fini(rdev);
8964                 radeon_wb_fini(rdev);
8965                 radeon_ib_pool_fini(rdev);
8966                 radeon_vm_manager_fini(rdev);
8967                 radeon_irq_kms_fini(rdev);
8968                 cik_pcie_gart_fini(rdev);
8969                 rdev->accel_working = false;
8970         }
8971
8972         /* Don't start up if the MC ucode is missing.
8973          * The default clocks and voltages before the MC ucode
8974          * is loaded are not suffient for advanced operations.
8975          */
8976         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8977                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8978                 return -EINVAL;
8979         }
8980
8981         return 0;
8982 }
8983
8984 /**
8985  * cik_fini - asic specific driver and hw fini
8986  *
8987  * @rdev: radeon_device pointer
8988  *
8989  * Tear down the asic specific driver variables and program the hw
8990  * to an idle state (CIK).
8991  * Called at driver unload.
8992  */
8993 void cik_fini(struct radeon_device *rdev)
8994 {
8995         radeon_pm_fini(rdev);
8996         cik_cp_fini(rdev);
8997         cik_sdma_fini(rdev);
8998         cik_fini_pg(rdev);
8999         cik_fini_cg(rdev);
9000         cik_irq_fini(rdev);
9001         sumo_rlc_fini(rdev);
9002         cik_mec_fini(rdev);
9003         radeon_wb_fini(rdev);
9004         radeon_vm_manager_fini(rdev);
9005         radeon_ib_pool_fini(rdev);
9006         radeon_irq_kms_fini(rdev);
9007         uvd_v1_0_fini(rdev);
9008         radeon_uvd_fini(rdev);
9009         radeon_vce_fini(rdev);
9010         cik_pcie_gart_fini(rdev);
9011         r600_vram_scratch_fini(rdev);
9012         radeon_gem_fini(rdev);
9013         radeon_fence_driver_fini(rdev);
9014         radeon_bo_fini(rdev);
9015         radeon_atombios_fini(rdev);
9016         kfree(rdev->bios);
9017         rdev->bios = NULL;
9018 }
9019
9020 void dce8_program_fmt(struct drm_encoder *encoder)
9021 {
9022         struct drm_device *dev = encoder->dev;
9023         struct radeon_device *rdev = dev->dev_private;
9024         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9025         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9026         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9027         int bpc = 0;
9028         u32 tmp = 0;
9029         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9030
9031         if (connector) {
9032                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9033                 bpc = radeon_get_monitor_bpc(connector);
9034                 dither = radeon_connector->dither;
9035         }
9036
9037         /* LVDS/eDP FMT is set up by atom */
9038         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9039                 return;
9040
9041         /* not needed for analog */
9042         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9043             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9044                 return;
9045
9046         if (bpc == 0)
9047                 return;
9048
9049         switch (bpc) {
9050         case 6:
9051                 if (dither == RADEON_FMT_DITHER_ENABLE)
9052                         /* XXX sort out optimal dither settings */
9053                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9054                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9055                 else
9056                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9057                 break;
9058         case 8:
9059                 if (dither == RADEON_FMT_DITHER_ENABLE)
9060                         /* XXX sort out optimal dither settings */
9061                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9062                                 FMT_RGB_RANDOM_ENABLE |
9063                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9064                 else
9065                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9066                 break;
9067         case 10:
9068                 if (dither == RADEON_FMT_DITHER_ENABLE)
9069                         /* XXX sort out optimal dither settings */
9070                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9071                                 FMT_RGB_RANDOM_ENABLE |
9072                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9073                 else
9074                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9075                 break;
9076         default:
9077                 /* not needed */
9078                 break;
9079         }
9080
9081         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9082 }
9083
9084 /* display watermark setup */
9085 /**
9086  * dce8_line_buffer_adjust - Set up the line buffer
9087  *
9088  * @rdev: radeon_device pointer
9089  * @radeon_crtc: the selected display controller
9090  * @mode: the current display mode on the selected display
9091  * controller
9092  *
9093  * Setup up the line buffer allocation for
9094  * the selected display controller (CIK).
9095  * Returns the line buffer size in pixels.
9096  */
9097 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9098                                    struct radeon_crtc *radeon_crtc,
9099                                    struct drm_display_mode *mode)
9100 {
9101         u32 tmp, buffer_alloc, i;
9102         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9103         /*
9104          * Line Buffer Setup
9105          * There are 6 line buffers, one for each display controllers.
9106          * There are 3 partitions per LB. Select the number of partitions
9107          * to enable based on the display width.  For display widths larger
9108          * than 4096, you need use to use 2 display controllers and combine
9109          * them using the stereo blender.
9110          */
9111         if (radeon_crtc->base.enabled && mode) {
9112                 if (mode->crtc_hdisplay < 1920) {
9113                         tmp = 1;
9114                         buffer_alloc = 2;
9115                 } else if (mode->crtc_hdisplay < 2560) {
9116                         tmp = 2;
9117                         buffer_alloc = 2;
9118                 } else if (mode->crtc_hdisplay < 4096) {
9119                         tmp = 0;
9120                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9121                 } else {
9122                         DRM_DEBUG_KMS("Mode too big for LB!\n");
9123                         tmp = 0;
9124                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9125                 }
9126         } else {
9127                 tmp = 1;
9128                 buffer_alloc = 0;
9129         }
9130
9131         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9132                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9133
9134         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9135                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9136         for (i = 0; i < rdev->usec_timeout; i++) {
9137                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9138                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
9139                         break;
9140                 udelay(1);
9141         }
9142
9143         if (radeon_crtc->base.enabled && mode) {
9144                 switch (tmp) {
9145                 case 0:
9146                 default:
9147                         return 4096 * 2;
9148                 case 1:
9149                         return 1920 * 2;
9150                 case 2:
9151                         return 2560 * 2;
9152                 }
9153         }
9154
9155         /* controller not enabled, so no lb used */
9156         return 0;
9157 }
9158
9159 /**
9160  * cik_get_number_of_dram_channels - get the number of dram channels
9161  *
9162  * @rdev: radeon_device pointer
9163  *
9164  * Look up the number of video ram channels (CIK).
9165  * Used for display watermark bandwidth calculations
9166  * Returns the number of dram channels
9167  */
9168 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9169 {
9170         u32 tmp = RREG32(MC_SHARED_CHMAP);
9171
9172         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9173         case 0:
9174         default:
9175                 return 1;
9176         case 1:
9177                 return 2;
9178         case 2:
9179                 return 4;
9180         case 3:
9181                 return 8;
9182         case 4:
9183                 return 3;
9184         case 5:
9185                 return 6;
9186         case 6:
9187                 return 10;
9188         case 7:
9189                 return 12;
9190         case 8:
9191                 return 16;
9192         }
9193 }
9194
9195 struct dce8_wm_params {
9196         u32 dram_channels; /* number of dram channels */
9197         u32 yclk;          /* bandwidth per dram data pin in kHz */
9198         u32 sclk;          /* engine clock in kHz */
9199         u32 disp_clk;      /* display clock in kHz */
9200         u32 src_width;     /* viewport width */
9201         u32 active_time;   /* active display time in ns */
9202         u32 blank_time;    /* blank time in ns */
9203         bool interlaced;    /* mode is interlaced */
9204         fixed20_12 vsc;    /* vertical scale ratio */
9205         u32 num_heads;     /* number of active crtcs */
9206         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9207         u32 lb_size;       /* line buffer allocated to pipe */
9208         u32 vtaps;         /* vertical scaler taps */
9209 };
9210
9211 /**
9212  * dce8_dram_bandwidth - get the dram bandwidth
9213  *
9214  * @wm: watermark calculation data
9215  *
9216  * Calculate the raw dram bandwidth (CIK).
9217  * Used for display watermark bandwidth calculations
9218  * Returns the dram bandwidth in MBytes/s
9219  */
9220 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9221 {
9222         /* Calculate raw DRAM Bandwidth */
9223         fixed20_12 dram_efficiency; /* 0.7 */
9224         fixed20_12 yclk, dram_channels, bandwidth;
9225         fixed20_12 a;
9226
9227         a.full = dfixed_const(1000);
9228         yclk.full = dfixed_const(wm->yclk);
9229         yclk.full = dfixed_div(yclk, a);
9230         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9231         a.full = dfixed_const(10);
9232         dram_efficiency.full = dfixed_const(7);
9233         dram_efficiency.full = dfixed_div(dram_efficiency, a);
9234         bandwidth.full = dfixed_mul(dram_channels, yclk);
9235         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9236
9237         return dfixed_trunc(bandwidth);
9238 }
9239
9240 /**
9241  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9242  *
9243  * @wm: watermark calculation data
9244  *
9245  * Calculate the dram bandwidth used for display (CIK).
9246  * Used for display watermark bandwidth calculations
9247  * Returns the dram bandwidth for display in MBytes/s
9248  */
9249 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9250 {
9251         /* Calculate DRAM Bandwidth and the part allocated to display. */
9252         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9253         fixed20_12 yclk, dram_channels, bandwidth;
9254         fixed20_12 a;
9255
9256         a.full = dfixed_const(1000);
9257         yclk.full = dfixed_const(wm->yclk);
9258         yclk.full = dfixed_div(yclk, a);
9259         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9260         a.full = dfixed_const(10);
9261         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9262         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9263         bandwidth.full = dfixed_mul(dram_channels, yclk);
9264         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9265
9266         return dfixed_trunc(bandwidth);
9267 }
9268
9269 /**
9270  * dce8_data_return_bandwidth - get the data return bandwidth
9271  *
9272  * @wm: watermark calculation data
9273  *
9274  * Calculate the data return bandwidth used for display (CIK).
9275  * Used for display watermark bandwidth calculations
9276  * Returns the data return bandwidth in MBytes/s
9277  */
9278 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9279 {
9280         /* Calculate the display Data return Bandwidth */
9281         fixed20_12 return_efficiency; /* 0.8 */
9282         fixed20_12 sclk, bandwidth;
9283         fixed20_12 a;
9284
9285         a.full = dfixed_const(1000);
9286         sclk.full = dfixed_const(wm->sclk);
9287         sclk.full = dfixed_div(sclk, a);
9288         a.full = dfixed_const(10);
9289         return_efficiency.full = dfixed_const(8);
9290         return_efficiency.full = dfixed_div(return_efficiency, a);
9291         a.full = dfixed_const(32);
9292         bandwidth.full = dfixed_mul(a, sclk);
9293         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9294
9295         return dfixed_trunc(bandwidth);
9296 }
9297
9298 /**
9299  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9300  *
9301  * @wm: watermark calculation data
9302  *
9303  * Calculate the dmif bandwidth used for display (CIK).
9304  * Used for display watermark bandwidth calculations
9305  * Returns the dmif bandwidth in MBytes/s
9306  */
9307 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9308 {
9309         /* Calculate the DMIF Request Bandwidth */
9310         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9311         fixed20_12 disp_clk, bandwidth;
9312         fixed20_12 a, b;
9313
9314         a.full = dfixed_const(1000);
9315         disp_clk.full = dfixed_const(wm->disp_clk);
9316         disp_clk.full = dfixed_div(disp_clk, a);
9317         a.full = dfixed_const(32);
9318         b.full = dfixed_mul(a, disp_clk);
9319
9320         a.full = dfixed_const(10);
9321         disp_clk_request_efficiency.full = dfixed_const(8);
9322         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9323
9324         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9325
9326         return dfixed_trunc(bandwidth);
9327 }
9328
9329 /**
9330  * dce8_available_bandwidth - get the min available bandwidth
9331  *
9332  * @wm: watermark calculation data
9333  *
9334  * Calculate the min available bandwidth used for display (CIK).
9335  * Used for display watermark bandwidth calculations
9336  * Returns the min available bandwidth in MBytes/s
9337  */
9338 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9339 {
9340         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9341         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9342         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9343         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9344
9345         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9346 }
9347
9348 /**
9349  * dce8_average_bandwidth - get the average available bandwidth
9350  *
9351  * @wm: watermark calculation data
9352  *
9353  * Calculate the average available bandwidth used for display (CIK).
9354  * Used for display watermark bandwidth calculations
9355  * Returns the average available bandwidth in MBytes/s
9356  */
9357 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9358 {
9359         /* Calculate the display mode Average Bandwidth
9360          * DisplayMode should contain the source and destination dimensions,
9361          * timing, etc.
9362          */
9363         fixed20_12 bpp;
9364         fixed20_12 line_time;
9365         fixed20_12 src_width;
9366         fixed20_12 bandwidth;
9367         fixed20_12 a;
9368
9369         a.full = dfixed_const(1000);
9370         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9371         line_time.full = dfixed_div(line_time, a);
9372         bpp.full = dfixed_const(wm->bytes_per_pixel);
9373         src_width.full = dfixed_const(wm->src_width);
9374         bandwidth.full = dfixed_mul(src_width, bpp);
9375         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9376         bandwidth.full = dfixed_div(bandwidth, line_time);
9377
9378         return dfixed_trunc(bandwidth);
9379 }
9380
9381 /**
9382  * dce8_latency_watermark - get the latency watermark
9383  *
9384  * @wm: watermark calculation data
9385  *
9386  * Calculate the latency watermark (CIK).
9387  * Used for display watermark bandwidth calculations
9388  * Returns the latency watermark in ns
9389  */
9390 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9391 {
9392         /* First calculate the latency in ns */
9393         u32 mc_latency = 2000; /* 2000 ns. */
9394         u32 available_bandwidth = dce8_available_bandwidth(wm);
9395         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9396         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9397         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9398         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9399                 (wm->num_heads * cursor_line_pair_return_time);
9400         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9401         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9402         u32 tmp, dmif_size = 12288;
9403         fixed20_12 a, b, c;
9404
9405         if (wm->num_heads == 0)
9406                 return 0;
9407
9408         a.full = dfixed_const(2);
9409         b.full = dfixed_const(1);
9410         if ((wm->vsc.full > a.full) ||
9411             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9412             (wm->vtaps >= 5) ||
9413             ((wm->vsc.full >= a.full) && wm->interlaced))
9414                 max_src_lines_per_dst_line = 4;
9415         else
9416                 max_src_lines_per_dst_line = 2;
9417
9418         a.full = dfixed_const(available_bandwidth);
9419         b.full = dfixed_const(wm->num_heads);
9420         a.full = dfixed_div(a, b);
9421
9422         b.full = dfixed_const(mc_latency + 512);
9423         c.full = dfixed_const(wm->disp_clk);
9424         b.full = dfixed_div(b, c);
9425
9426         c.full = dfixed_const(dmif_size);
9427         b.full = dfixed_div(c, b);
9428
9429         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9430
9431         b.full = dfixed_const(1000);
9432         c.full = dfixed_const(wm->disp_clk);
9433         b.full = dfixed_div(c, b);
9434         c.full = dfixed_const(wm->bytes_per_pixel);
9435         b.full = dfixed_mul(b, c);
9436
9437         lb_fill_bw = min(tmp, dfixed_trunc(b));
9438
9439         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9440         b.full = dfixed_const(1000);
9441         c.full = dfixed_const(lb_fill_bw);
9442         b.full = dfixed_div(c, b);
9443         a.full = dfixed_div(a, b);
9444         line_fill_time = dfixed_trunc(a);
9445
9446         if (line_fill_time < wm->active_time)
9447                 return latency;
9448         else
9449                 return latency + (line_fill_time - wm->active_time);
9450
9451 }
9452
9453 /**
9454  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9455  * average and available dram bandwidth
9456  *
9457  * @wm: watermark calculation data
9458  *
9459  * Check if the display average bandwidth fits in the display
9460  * dram bandwidth (CIK).
9461  * Used for display watermark bandwidth calculations
9462  * Returns true if the display fits, false if not.
9463  */
9464 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9465 {
9466         if (dce8_average_bandwidth(wm) <=
9467             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9468                 return true;
9469         else
9470                 return false;
9471 }
9472
9473 /**
9474  * dce8_average_bandwidth_vs_available_bandwidth - check
9475  * average and available bandwidth
9476  *
9477  * @wm: watermark calculation data
9478  *
9479  * Check if the display average bandwidth fits in the display
9480  * available bandwidth (CIK).
9481  * Used for display watermark bandwidth calculations
9482  * Returns true if the display fits, false if not.
9483  */
9484 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9485 {
9486         if (dce8_average_bandwidth(wm) <=
9487             (dce8_available_bandwidth(wm) / wm->num_heads))
9488                 return true;
9489         else
9490                 return false;
9491 }
9492
9493 /**
9494  * dce8_check_latency_hiding - check latency hiding
9495  *
9496  * @wm: watermark calculation data
9497  *
9498  * Check latency hiding (CIK).
9499  * Used for display watermark bandwidth calculations
9500  * Returns true if the display fits, false if not.
9501  */
9502 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9503 {
9504         u32 lb_partitions = wm->lb_size / wm->src_width;
9505         u32 line_time = wm->active_time + wm->blank_time;
9506         u32 latency_tolerant_lines;
9507         u32 latency_hiding;
9508         fixed20_12 a;
9509
9510         a.full = dfixed_const(1);
9511         if (wm->vsc.full > a.full)
9512                 latency_tolerant_lines = 1;
9513         else {
9514                 if (lb_partitions <= (wm->vtaps + 1))
9515                         latency_tolerant_lines = 1;
9516                 else
9517                         latency_tolerant_lines = 2;
9518         }
9519
9520         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9521
9522         if (dce8_latency_watermark(wm) <= latency_hiding)
9523                 return true;
9524         else
9525                 return false;
9526 }
9527
9528 /**
9529  * dce8_program_watermarks - program display watermarks
9530  *
9531  * @rdev: radeon_device pointer
9532  * @radeon_crtc: the selected display controller
9533  * @lb_size: line buffer size
9534  * @num_heads: number of display controllers in use
9535  *
9536  * Calculate and program the display watermarks for the
9537  * selected display controller (CIK).
9538  */
9539 static void dce8_program_watermarks(struct radeon_device *rdev,
9540                                     struct radeon_crtc *radeon_crtc,
9541                                     u32 lb_size, u32 num_heads)
9542 {
9543         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9544         struct dce8_wm_params wm_low, wm_high;
9545         u32 pixel_period;
9546         u32 line_time = 0;
9547         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9548         u32 tmp, wm_mask;
9549
9550         if (radeon_crtc->base.enabled && num_heads && mode) {
9551                 pixel_period = 1000000 / (u32)mode->clock;
9552                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9553
9554                 /* watermark for high clocks */
9555                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9556                     rdev->pm.dpm_enabled) {
9557                         wm_high.yclk =
9558                                 radeon_dpm_get_mclk(rdev, false) * 10;
9559                         wm_high.sclk =
9560                                 radeon_dpm_get_sclk(rdev, false) * 10;
9561                 } else {
9562                         wm_high.yclk = rdev->pm.current_mclk * 10;
9563                         wm_high.sclk = rdev->pm.current_sclk * 10;
9564                 }
9565
9566                 wm_high.disp_clk = mode->clock;
9567                 wm_high.src_width = mode->crtc_hdisplay;
9568                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9569                 wm_high.blank_time = line_time - wm_high.active_time;
9570                 wm_high.interlaced = false;
9571                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9572                         wm_high.interlaced = true;
9573                 wm_high.vsc = radeon_crtc->vsc;
9574                 wm_high.vtaps = 1;
9575                 if (radeon_crtc->rmx_type != RMX_OFF)
9576                         wm_high.vtaps = 2;
9577                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9578                 wm_high.lb_size = lb_size;
9579                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9580                 wm_high.num_heads = num_heads;
9581
9582                 /* set for high clocks */
9583                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9584
9585                 /* possibly force display priority to high */
9586                 /* should really do this at mode validation time... */
9587                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9588                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9589                     !dce8_check_latency_hiding(&wm_high) ||
9590                     (rdev->disp_priority == 2)) {
9591                         DRM_DEBUG_KMS("force priority to high\n");
9592                 }
9593
9594                 /* watermark for low clocks */
9595                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9596                     rdev->pm.dpm_enabled) {
9597                         wm_low.yclk =
9598                                 radeon_dpm_get_mclk(rdev, true) * 10;
9599                         wm_low.sclk =
9600                                 radeon_dpm_get_sclk(rdev, true) * 10;
9601                 } else {
9602                         wm_low.yclk = rdev->pm.current_mclk * 10;
9603                         wm_low.sclk = rdev->pm.current_sclk * 10;
9604                 }
9605
9606                 wm_low.disp_clk = mode->clock;
9607                 wm_low.src_width = mode->crtc_hdisplay;
9608                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9609                 wm_low.blank_time = line_time - wm_low.active_time;
9610                 wm_low.interlaced = false;
9611                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9612                         wm_low.interlaced = true;
9613                 wm_low.vsc = radeon_crtc->vsc;
9614                 wm_low.vtaps = 1;
9615                 if (radeon_crtc->rmx_type != RMX_OFF)
9616                         wm_low.vtaps = 2;
9617                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9618                 wm_low.lb_size = lb_size;
9619                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9620                 wm_low.num_heads = num_heads;
9621
9622                 /* set for low clocks */
9623                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9624
9625                 /* possibly force display priority to high */
9626                 /* should really do this at mode validation time... */
9627                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9628                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9629                     !dce8_check_latency_hiding(&wm_low) ||
9630                     (rdev->disp_priority == 2)) {
9631                         DRM_DEBUG_KMS("force priority to high\n");
9632                 }
9633         }
9634
9635         /* select wm A */
9636         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9637         tmp = wm_mask;
9638         tmp &= ~LATENCY_WATERMARK_MASK(3);
9639         tmp |= LATENCY_WATERMARK_MASK(1);
9640         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9641         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9642                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9643                 LATENCY_HIGH_WATERMARK(line_time)));
9644         /* select wm B */
9645         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9646         tmp &= ~LATENCY_WATERMARK_MASK(3);
9647         tmp |= LATENCY_WATERMARK_MASK(2);
9648         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9649         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9650                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9651                 LATENCY_HIGH_WATERMARK(line_time)));
9652         /* restore original selection */
9653         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9654
9655         /* save values for DPM */
9656         radeon_crtc->line_time = line_time;
9657         radeon_crtc->wm_high = latency_watermark_a;
9658         radeon_crtc->wm_low = latency_watermark_b;
9659 }
9660
9661 /**
9662  * dce8_bandwidth_update - program display watermarks
9663  *
9664  * @rdev: radeon_device pointer
9665  *
9666  * Calculate and program the display watermarks and line
9667  * buffer allocation (CIK).
9668  */
9669 void dce8_bandwidth_update(struct radeon_device *rdev)
9670 {
9671         struct drm_display_mode *mode = NULL;
9672         u32 num_heads = 0, lb_size;
9673         int i;
9674
9675         if (!rdev->mode_info.mode_config_initialized)
9676                 return;
9677
9678         radeon_update_display_priority(rdev);
9679
9680         for (i = 0; i < rdev->num_crtc; i++) {
9681                 if (rdev->mode_info.crtcs[i]->base.enabled)
9682                         num_heads++;
9683         }
9684         for (i = 0; i < rdev->num_crtc; i++) {
9685                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9686                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9687                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9688         }
9689 }
9690
9691 /**
9692  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9693  *
9694  * @rdev: radeon_device pointer
9695  *
9696  * Fetches a GPU clock counter snapshot (SI).
9697  * Returns the 64 bit clock counter snapshot.
9698  */
9699 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9700 {
9701         uint64_t clock;
9702
9703         mutex_lock(&rdev->gpu_clock_mutex);
9704         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9705         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9706                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9707         mutex_unlock(&rdev->gpu_clock_mutex);
9708         return clock;
9709 }
9710
9711 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9712                               u32 cntl_reg, u32 status_reg)
9713 {
9714         int r, i;
9715         struct atom_clock_dividers dividers;
9716         uint32_t tmp;
9717
9718         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9719                                            clock, false, &dividers);
9720         if (r)
9721                 return r;
9722
9723         tmp = RREG32_SMC(cntl_reg);
9724         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9725         tmp |= dividers.post_divider;
9726         WREG32_SMC(cntl_reg, tmp);
9727
9728         for (i = 0; i < 100; i++) {
9729                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9730                         break;
9731                 mdelay(10);
9732         }
9733         if (i == 100)
9734                 return -ETIMEDOUT;
9735
9736         return 0;
9737 }
9738
9739 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9740 {
9741         int r = 0;
9742
9743         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9744         if (r)
9745                 return r;
9746
9747         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9748         return r;
9749 }
9750
9751 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9752 {
9753         int r, i;
9754         struct atom_clock_dividers dividers;
9755         u32 tmp;
9756
9757         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9758                                            ecclk, false, &dividers);
9759         if (r)
9760                 return r;
9761
9762         for (i = 0; i < 100; i++) {
9763                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9764                         break;
9765                 mdelay(10);
9766         }
9767         if (i == 100)
9768                 return -ETIMEDOUT;
9769
9770         tmp = RREG32_SMC(CG_ECLK_CNTL);
9771         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9772         tmp |= dividers.post_divider;
9773         WREG32_SMC(CG_ECLK_CNTL, tmp);
9774
9775         for (i = 0; i < 100; i++) {
9776                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9777                         break;
9778                 mdelay(10);
9779         }
9780         if (i == 100)
9781                 return -ETIMEDOUT;
9782
9783         return 0;
9784 }
9785
9786 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9787 {
9788         struct pci_dev *root = rdev->pdev->bus->self;
9789         int bridge_pos, gpu_pos;
9790         u32 speed_cntl, mask, current_data_rate;
9791         int ret, i;
9792         u16 tmp16;
9793
9794         if (pci_is_root_bus(rdev->pdev->bus))
9795                 return;
9796
9797         if (radeon_pcie_gen2 == 0)
9798                 return;
9799
9800         if (rdev->flags & RADEON_IS_IGP)
9801                 return;
9802
9803         if (!(rdev->flags & RADEON_IS_PCIE))
9804                 return;
9805
9806         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9807         if (ret != 0)
9808                 return;
9809
9810         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9811                 return;
9812
9813         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9814         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9815                 LC_CURRENT_DATA_RATE_SHIFT;
9816         if (mask & DRM_PCIE_SPEED_80) {
9817                 if (current_data_rate == 2) {
9818                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9819                         return;
9820                 }
9821                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9822         } else if (mask & DRM_PCIE_SPEED_50) {
9823                 if (current_data_rate == 1) {
9824                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9825                         return;
9826                 }
9827                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9828         }
9829
9830         bridge_pos = pci_pcie_cap(root);
9831         if (!bridge_pos)
9832                 return;
9833
9834         gpu_pos = pci_pcie_cap(rdev->pdev);
9835         if (!gpu_pos)
9836                 return;
9837
9838         if (mask & DRM_PCIE_SPEED_80) {
9839                 /* re-try equalization if gen3 is not already enabled */
9840                 if (current_data_rate != 2) {
9841                         u16 bridge_cfg, gpu_cfg;
9842                         u16 bridge_cfg2, gpu_cfg2;
9843                         u32 max_lw, current_lw, tmp;
9844
9845                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9846                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9847
9848                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9849                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9850
9851                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9852                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9853
9854                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9855                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9856                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9857
9858                         if (current_lw < max_lw) {
9859                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9860                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9861                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9862                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9863                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9864                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9865                                 }
9866                         }
9867
9868                         for (i = 0; i < 10; i++) {
9869                                 /* check status */
9870                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9871                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9872                                         break;
9873
9874                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9875                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9876
9877                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9878                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9879
9880                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9881                                 tmp |= LC_SET_QUIESCE;
9882                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9883
9884                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9885                                 tmp |= LC_REDO_EQ;
9886                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9887
9888                                 mdelay(100);
9889
9890                                 /* linkctl */
9891                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9892                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9893                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9894                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9895
9896                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9897                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9898                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9899                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9900
9901                                 /* linkctl2 */
9902                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9903                                 tmp16 &= ~((1 << 4) | (7 << 9));
9904                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9905                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9906
9907                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9908                                 tmp16 &= ~((1 << 4) | (7 << 9));
9909                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9910                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9911
9912                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9913                                 tmp &= ~LC_SET_QUIESCE;
9914                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9915                         }
9916                 }
9917         }
9918
9919         /* set the link speed */
9920         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9921         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9922         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9923
9924         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9925         tmp16 &= ~0xf;
9926         if (mask & DRM_PCIE_SPEED_80)
9927                 tmp16 |= 3; /* gen3 */
9928         else if (mask & DRM_PCIE_SPEED_50)
9929                 tmp16 |= 2; /* gen2 */
9930         else
9931                 tmp16 |= 1; /* gen1 */
9932         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9933
9934         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9935         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9936         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9937
9938         for (i = 0; i < rdev->usec_timeout; i++) {
9939                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9940                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9941                         break;
9942                 udelay(1);
9943         }
9944 }
9945
9946 static void cik_program_aspm(struct radeon_device *rdev)
9947 {
9948         u32 data, orig;
9949         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9950         bool disable_clkreq = false;
9951
9952         if (radeon_aspm == 0)
9953                 return;
9954
9955         /* XXX double check IGPs */
9956         if (rdev->flags & RADEON_IS_IGP)
9957                 return;
9958
9959         if (!(rdev->flags & RADEON_IS_PCIE))
9960                 return;
9961
9962         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9963         data &= ~LC_XMIT_N_FTS_MASK;
9964         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9965         if (orig != data)
9966                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9967
9968         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9969         data |= LC_GO_TO_RECOVERY;
9970         if (orig != data)
9971                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9972
9973         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9974         data |= P_IGNORE_EDB_ERR;
9975         if (orig != data)
9976                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9977
9978         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9979         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9980         data |= LC_PMI_TO_L1_DIS;
9981         if (!disable_l0s)
9982                 data |= LC_L0S_INACTIVITY(7);
9983
9984         if (!disable_l1) {
9985                 data |= LC_L1_INACTIVITY(7);
9986                 data &= ~LC_PMI_TO_L1_DIS;
9987                 if (orig != data)
9988                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9989
9990                 if (!disable_plloff_in_l1) {
9991                         bool clk_req_support;
9992
9993                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9994                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9995                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9996                         if (orig != data)
9997                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9998
9999                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
10000                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10001                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10002                         if (orig != data)
10003                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
10004
10005                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
10006                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
10007                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
10008                         if (orig != data)
10009                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
10010
10011                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
10012                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10013                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10014                         if (orig != data)
10015                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
10016
10017                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
10018                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
10019                         data |= LC_DYN_LANES_PWR_STATE(3);
10020                         if (orig != data)
10021                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
10022
10023                         if (!disable_clkreq &&
10024                             !pci_is_root_bus(rdev->pdev->bus)) {
10025                                 struct pci_dev *root = rdev->pdev->bus->self;
10026                                 u32 lnkcap;
10027
10028                                 clk_req_support = false;
10029                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10030                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10031                                         clk_req_support = true;
10032                         } else {
10033                                 clk_req_support = false;
10034                         }
10035
10036                         if (clk_req_support) {
10037                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10038                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10039                                 if (orig != data)
10040                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10041
10042                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
10043                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10044                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10045                                 if (orig != data)
10046                                         WREG32_SMC(THM_CLK_CNTL, data);
10047
10048                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
10049                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10050                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10051                                 if (orig != data)
10052                                         WREG32_SMC(MISC_CLK_CTRL, data);
10053
10054                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10055                                 data &= ~BCLK_AS_XCLK;
10056                                 if (orig != data)
10057                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
10058
10059                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10060                                 data &= ~FORCE_BIF_REFCLK_EN;
10061                                 if (orig != data)
10062                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10063
10064                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10065                                 data &= ~MPLL_CLKOUT_SEL_MASK;
10066                                 data |= MPLL_CLKOUT_SEL(4);
10067                                 if (orig != data)
10068                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10069                         }
10070                 }
10071         } else {
10072                 if (orig != data)
10073                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10074         }
10075
10076         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10077         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10078         if (orig != data)
10079                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
10080
10081         if (!disable_l0s) {
10082                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10083                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10084                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10085                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10086                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10087                                 data &= ~LC_L0S_INACTIVITY_MASK;
10088                                 if (orig != data)
10089                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10090                         }
10091                 }
10092         }
10093 }