Merge remote-tracking branches 'asoc/topic/rl6231', 'asoc/topic/rt5514', 'asoc/topic...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122
123 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
124 extern void r600_ih_ring_fini(struct radeon_device *rdev);
125 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
126 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
128 extern void sumo_rlc_fini(struct radeon_device *rdev);
129 extern int sumo_rlc_init(struct radeon_device *rdev);
130 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
131 extern void si_rlc_reset(struct radeon_device *rdev);
132 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
133 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
134 extern int cik_sdma_resume(struct radeon_device *rdev);
135 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
136 extern void cik_sdma_fini(struct radeon_device *rdev);
137 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
138 static void cik_rlc_stop(struct radeon_device *rdev);
139 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
140 static void cik_program_aspm(struct radeon_device *rdev);
141 static void cik_init_pg(struct radeon_device *rdev);
142 static void cik_init_cg(struct radeon_device *rdev);
143 static void cik_fini_pg(struct radeon_device *rdev);
144 static void cik_fini_cg(struct radeon_device *rdev);
145 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
146                                           bool enable);
147
148 /**
149  * cik_get_allowed_info_register - fetch the register for the info ioctl
150  *
151  * @rdev: radeon_device pointer
152  * @reg: register offset in bytes
153  * @val: register value
154  *
155  * Returns 0 for success or -EINVAL for an invalid register
156  *
157  */
158 int cik_get_allowed_info_register(struct radeon_device *rdev,
159                                   u32 reg, u32 *val)
160 {
161         switch (reg) {
162         case GRBM_STATUS:
163         case GRBM_STATUS2:
164         case GRBM_STATUS_SE0:
165         case GRBM_STATUS_SE1:
166         case GRBM_STATUS_SE2:
167         case GRBM_STATUS_SE3:
168         case SRBM_STATUS:
169         case SRBM_STATUS2:
170         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
171         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
172         case UVD_STATUS:
173         /* TODO VCE */
174                 *val = RREG32(reg);
175                 return 0;
176         default:
177                 return -EINVAL;
178         }
179 }
180
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
185 {
186         unsigned long flags;
187         u32 r;
188
189         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
190         WREG32(CIK_DIDT_IND_INDEX, (reg));
191         r = RREG32(CIK_DIDT_IND_DATA);
192         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
193         return r;
194 }
195
196 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198         unsigned long flags;
199
200         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
201         WREG32(CIK_DIDT_IND_INDEX, (reg));
202         WREG32(CIK_DIDT_IND_DATA, (v));
203         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
204 }
205
206 /* get temperature in millidegrees */
207 int ci_get_temp(struct radeon_device *rdev)
208 {
209         u32 temp;
210         int actual_temp = 0;
211
212         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
213                 CTF_TEMP_SHIFT;
214
215         if (temp & 0x200)
216                 actual_temp = 255;
217         else
218                 actual_temp = temp & 0x1ff;
219
220         actual_temp = actual_temp * 1000;
221
222         return actual_temp;
223 }
224
225 /* get temperature in millidegrees */
226 int kv_get_temp(struct radeon_device *rdev)
227 {
228         u32 temp;
229         int actual_temp = 0;
230
231         temp = RREG32_SMC(0xC0300E0C);
232
233         if (temp)
234                 actual_temp = (temp / 8) - 49;
235         else
236                 actual_temp = 0;
237
238         actual_temp = actual_temp * 1000;
239
240         return actual_temp;
241 }
242
243 /*
244  * Indirect registers accessor
245  */
246 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
247 {
248         unsigned long flags;
249         u32 r;
250
251         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
252         WREG32(PCIE_INDEX, reg);
253         (void)RREG32(PCIE_INDEX);
254         r = RREG32(PCIE_DATA);
255         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
256         return r;
257 }
258
259 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
260 {
261         unsigned long flags;
262
263         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
264         WREG32(PCIE_INDEX, reg);
265         (void)RREG32(PCIE_INDEX);
266         WREG32(PCIE_DATA, v);
267         (void)RREG32(PCIE_DATA);
268         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
269 }
270
271 static const u32 spectre_rlc_save_restore_register_list[] =
272 {
273         (0x0e00 << 16) | (0xc12c >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc140 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc150 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc15c >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc168 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc170 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc178 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc204 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2b4 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2b8 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc2bc >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc2c0 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x8228 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0x829c >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x869c >> 2),
302         0x00000000,
303         (0x0600 << 16) | (0x98f4 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0x98f8 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0x9900 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0xc260 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x90e8 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x3c000 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x3c00c >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x8c1c >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0x9700 >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x4e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x5e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x6e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x7e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0x8e00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0x9e00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0xae00 << 16) | (0xcd20 >> 2),
336         0x00000000,
337         (0xbe00 << 16) | (0xcd20 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x89bc >> 2),
340         0x00000000,
341         (0x0e00 << 16) | (0x8900 >> 2),
342         0x00000000,
343         0x3,
344         (0x0e00 << 16) | (0xc130 >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc134 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc1fc >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc208 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc264 >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc268 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc26c >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc270 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc274 >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc278 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc27c >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc280 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc284 >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc288 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc28c >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc290 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc294 >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc298 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc29c >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a0 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2a4 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2a8 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0xc2ac  >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0xc2b0 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x301d0 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30238 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30250 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x30254 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30258 >> 2),
401         0x00000000,
402         (0x0e00 << 16) | (0x3025c >> 2),
403         0x00000000,
404         (0x4e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x5e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x6e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x7e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0x8e00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0x9e00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0xae00 << 16) | (0xc900 >> 2),
417         0x00000000,
418         (0xbe00 << 16) | (0xc900 >> 2),
419         0x00000000,
420         (0x4e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x5e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x6e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x7e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0x8e00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0x9e00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0xae00 << 16) | (0xc904 >> 2),
433         0x00000000,
434         (0xbe00 << 16) | (0xc904 >> 2),
435         0x00000000,
436         (0x4e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x5e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x6e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x7e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0x8e00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0x9e00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0xae00 << 16) | (0xc908 >> 2),
449         0x00000000,
450         (0xbe00 << 16) | (0xc908 >> 2),
451         0x00000000,
452         (0x4e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x5e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x6e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x7e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0x8e00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0x9e00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0xae00 << 16) | (0xc90c >> 2),
465         0x00000000,
466         (0xbe00 << 16) | (0xc90c >> 2),
467         0x00000000,
468         (0x4e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x5e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x6e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x7e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0x8e00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0x9e00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0xae00 << 16) | (0xc910 >> 2),
481         0x00000000,
482         (0xbe00 << 16) | (0xc910 >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xc99c >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9834 >> 2),
487         0x00000000,
488         (0x0000 << 16) | (0x30f00 >> 2),
489         0x00000000,
490         (0x0001 << 16) | (0x30f00 >> 2),
491         0x00000000,
492         (0x0000 << 16) | (0x30f04 >> 2),
493         0x00000000,
494         (0x0001 << 16) | (0x30f04 >> 2),
495         0x00000000,
496         (0x0000 << 16) | (0x30f08 >> 2),
497         0x00000000,
498         (0x0001 << 16) | (0x30f08 >> 2),
499         0x00000000,
500         (0x0000 << 16) | (0x30f0c >> 2),
501         0x00000000,
502         (0x0001 << 16) | (0x30f0c >> 2),
503         0x00000000,
504         (0x0600 << 16) | (0x9b7c >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x8a14 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8a18 >> 2),
509         0x00000000,
510         (0x0600 << 16) | (0x30a00 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8bf0 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x8bcc >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x8b24 >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x30a04 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a10 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a14 >> 2),
523         0x00000000,
524         (0x0600 << 16) | (0x30a18 >> 2),
525         0x00000000,
526         (0x0600 << 16) | (0x30a2c >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc700 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc704 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xc708 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0xc768 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc770 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc774 >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc778 >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc77c >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc780 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc784 >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc788 >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc78c >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc798 >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc79c >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a0 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7a4 >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7a8 >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7ac >> 2),
563         0x00000000,
564         (0x0400 << 16) | (0xc7b0 >> 2),
565         0x00000000,
566         (0x0400 << 16) | (0xc7b4 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x9100 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x3c010 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92a8 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92ac >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92b4 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92b8 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92bc >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c0 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92c4 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92c8 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x92cc >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x92d0 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c00 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c04 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c20 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0x8c38 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x8c3c >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xae00 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0x9604 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac08 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac0c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac10 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac14 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac58 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac68 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac6c >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac70 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac74 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac78 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac7c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac80 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac84 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0xac88 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0xac8c >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x970c >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x9714 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x9718 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x971c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x4e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x5e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x6e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x7e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0x8e00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0x9e00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0xae00 << 16) | (0x31068 >> 2),
659         0x00000000,
660         (0xbe00 << 16) | (0x31068 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xcd10 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xcd14 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b0 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88b4 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x88b8 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x88bc >> 2),
673         0x00000000,
674         (0x0400 << 16) | (0x89c0 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88c4 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88c8 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d0 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x88d4 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x88d8 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x8980 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x30938 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x3093c >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x30940 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x89a0 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30900 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x30904 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x89b4 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c210 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x3c214 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x3c218 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0x8904 >> 2),
709         0x00000000,
710         0x5,
711         (0x0e00 << 16) | (0x8c28 >> 2),
712         (0x0e00 << 16) | (0x8c2c >> 2),
713         (0x0e00 << 16) | (0x8c30 >> 2),
714         (0x0e00 << 16) | (0x8c34 >> 2),
715         (0x0e00 << 16) | (0x9600 >> 2),
716 };
717
718 static const u32 kalindi_rlc_save_restore_register_list[] =
719 {
720         (0x0e00 << 16) | (0xc12c >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc140 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc150 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc15c >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc168 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc170 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc204 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2b4 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2b8 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc2bc >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0xc2c0 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x8228 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x829c >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x869c >> 2),
747         0x00000000,
748         (0x0600 << 16) | (0x98f4 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0x98f8 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x9900 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0xc260 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x90e8 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x3c000 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x3c00c >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x8c1c >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0x9700 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x4e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x5e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x6e00 << 16) | (0xcd20 >> 2),
773         0x00000000,
774         (0x7e00 << 16) | (0xcd20 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x89bc >> 2),
777         0x00000000,
778         (0x0e00 << 16) | (0x8900 >> 2),
779         0x00000000,
780         0x3,
781         (0x0e00 << 16) | (0xc130 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc134 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc1fc >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc208 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc264 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc268 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc26c >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc270 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc274 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc28c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc290 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc294 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc298 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a0 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2a4 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xc2a8 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0xc2ac >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x301d0 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30238 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30250 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x30254 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0x30258 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0x3025c >> 2),
826         0x00000000,
827         (0x4e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x5e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x6e00 << 16) | (0xc900 >> 2),
832         0x00000000,
833         (0x7e00 << 16) | (0xc900 >> 2),
834         0x00000000,
835         (0x4e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x5e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x6e00 << 16) | (0xc904 >> 2),
840         0x00000000,
841         (0x7e00 << 16) | (0xc904 >> 2),
842         0x00000000,
843         (0x4e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x5e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x6e00 << 16) | (0xc908 >> 2),
848         0x00000000,
849         (0x7e00 << 16) | (0xc908 >> 2),
850         0x00000000,
851         (0x4e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x5e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x6e00 << 16) | (0xc90c >> 2),
856         0x00000000,
857         (0x7e00 << 16) | (0xc90c >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0xc910 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0xc910 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xc99c >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x9834 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f00 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f04 >> 2),
874         0x00000000,
875         (0x0000 << 16) | (0x30f08 >> 2),
876         0x00000000,
877         (0x0000 << 16) | (0x30f0c >> 2),
878         0x00000000,
879         (0x0600 << 16) | (0x9b7c >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x8a14 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8a18 >> 2),
884         0x00000000,
885         (0x0600 << 16) | (0x30a00 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8bf0 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x8bcc >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8b24 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30a04 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a10 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a14 >> 2),
898         0x00000000,
899         (0x0600 << 16) | (0x30a18 >> 2),
900         0x00000000,
901         (0x0600 << 16) | (0x30a2c >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc700 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc704 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0xc708 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0xc768 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc770 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc774 >> 2),
914         0x00000000,
915         (0x0400 << 16) | (0xc798 >> 2),
916         0x00000000,
917         (0x0400 << 16) | (0xc79c >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x9100 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x3c010 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c00 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c04 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c20 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x8c38 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x8c3c >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xae00 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0x9604 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac08 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac0c >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac10 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac14 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac58 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac68 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac6c >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac70 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac74 >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac78 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac7c >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac80 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac84 >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0xac88 >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0xac8c >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x970c >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x9714 >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x9718 >> 2),
972         0x00000000,
973         (0x0e00 << 16) | (0x971c >> 2),
974         0x00000000,
975         (0x0e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x4e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x5e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x6e00 << 16) | (0x31068 >> 2),
982         0x00000000,
983         (0x7e00 << 16) | (0x31068 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0xcd10 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0xcd14 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b0 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88b4 >> 2),
992         0x00000000,
993         (0x0e00 << 16) | (0x88b8 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x88bc >> 2),
996         0x00000000,
997         (0x0400 << 16) | (0x89c0 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88c4 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88c8 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d0 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x88d4 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x88d8 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x8980 >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x30938 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x3093c >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30940 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x89a0 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x30900 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x30904 >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x89b4 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3e1fc >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c210 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x3c214 >> 2),
1030         0x00000000,
1031         (0x0e00 << 16) | (0x3c218 >> 2),
1032         0x00000000,
1033         (0x0e00 << 16) | (0x8904 >> 2),
1034         0x00000000,
1035         0x5,
1036         (0x0e00 << 16) | (0x8c28 >> 2),
1037         (0x0e00 << 16) | (0x8c2c >> 2),
1038         (0x0e00 << 16) | (0x8c30 >> 2),
1039         (0x0e00 << 16) | (0x8c34 >> 2),
1040         (0x0e00 << 16) | (0x9600 >> 2),
1041 };
1042
1043 static const u32 bonaire_golden_spm_registers[] =
1044 {
1045         0x30800, 0xe0ffffff, 0xe0000000
1046 };
1047
1048 static const u32 bonaire_golden_common_registers[] =
1049 {
1050         0xc770, 0xffffffff, 0x00000800,
1051         0xc774, 0xffffffff, 0x00000800,
1052         0xc798, 0xffffffff, 0x00007fbf,
1053         0xc79c, 0xffffffff, 0x00007faf
1054 };
1055
1056 static const u32 bonaire_golden_registers[] =
1057 {
1058         0x3354, 0x00000333, 0x00000333,
1059         0x3350, 0x000c0fc0, 0x00040200,
1060         0x9a10, 0x00010000, 0x00058208,
1061         0x3c000, 0xffff1fff, 0x00140000,
1062         0x3c200, 0xfdfc0fff, 0x00000100,
1063         0x3c234, 0x40000000, 0x40000200,
1064         0x9830, 0xffffffff, 0x00000000,
1065         0x9834, 0xf00fffff, 0x00000400,
1066         0x9838, 0x0002021c, 0x00020200,
1067         0xc78, 0x00000080, 0x00000000,
1068         0x5bb0, 0x000000f0, 0x00000070,
1069         0x5bc0, 0xf0311fff, 0x80300000,
1070         0x98f8, 0x73773777, 0x12010001,
1071         0x350c, 0x00810000, 0x408af000,
1072         0x7030, 0x31000111, 0x00000011,
1073         0x2f48, 0x73773777, 0x12010001,
1074         0x220c, 0x00007fb6, 0x0021a1b1,
1075         0x2210, 0x00007fb6, 0x002021b1,
1076         0x2180, 0x00007fb6, 0x00002191,
1077         0x2218, 0x00007fb6, 0x002121b1,
1078         0x221c, 0x00007fb6, 0x002021b1,
1079         0x21dc, 0x00007fb6, 0x00002191,
1080         0x21e0, 0x00007fb6, 0x00002191,
1081         0x3628, 0x0000003f, 0x0000000a,
1082         0x362c, 0x0000003f, 0x0000000a,
1083         0x2ae4, 0x00073ffe, 0x000022a2,
1084         0x240c, 0x000007ff, 0x00000000,
1085         0x8a14, 0xf000003f, 0x00000007,
1086         0x8bf0, 0x00002001, 0x00000001,
1087         0x8b24, 0xffffffff, 0x00ffffff,
1088         0x30a04, 0x0000ff0f, 0x00000000,
1089         0x28a4c, 0x07ffffff, 0x06000000,
1090         0x4d8, 0x00000fff, 0x00000100,
1091         0x3e78, 0x00000001, 0x00000002,
1092         0x9100, 0x03000000, 0x0362c688,
1093         0x8c00, 0x000000ff, 0x00000001,
1094         0xe40, 0x00001fff, 0x00001fff,
1095         0x9060, 0x0000007f, 0x00000020,
1096         0x9508, 0x00010000, 0x00010000,
1097         0xac14, 0x000003ff, 0x000000f3,
1098         0xac0c, 0xffffffff, 0x00001032
1099 };
1100
1101 static const u32 bonaire_mgcg_cgcg_init[] =
1102 {
1103         0xc420, 0xffffffff, 0xfffffffc,
1104         0x30800, 0xffffffff, 0xe0000000,
1105         0x3c2a0, 0xffffffff, 0x00000100,
1106         0x3c208, 0xffffffff, 0x00000100,
1107         0x3c2c0, 0xffffffff, 0xc0000100,
1108         0x3c2c8, 0xffffffff, 0xc0000100,
1109         0x3c2c4, 0xffffffff, 0xc0000100,
1110         0x55e4, 0xffffffff, 0x00600100,
1111         0x3c280, 0xffffffff, 0x00000100,
1112         0x3c214, 0xffffffff, 0x06000100,
1113         0x3c220, 0xffffffff, 0x00000100,
1114         0x3c218, 0xffffffff, 0x06000100,
1115         0x3c204, 0xffffffff, 0x00000100,
1116         0x3c2e0, 0xffffffff, 0x00000100,
1117         0x3c224, 0xffffffff, 0x00000100,
1118         0x3c200, 0xffffffff, 0x00000100,
1119         0x3c230, 0xffffffff, 0x00000100,
1120         0x3c234, 0xffffffff, 0x00000100,
1121         0x3c250, 0xffffffff, 0x00000100,
1122         0x3c254, 0xffffffff, 0x00000100,
1123         0x3c258, 0xffffffff, 0x00000100,
1124         0x3c25c, 0xffffffff, 0x00000100,
1125         0x3c260, 0xffffffff, 0x00000100,
1126         0x3c27c, 0xffffffff, 0x00000100,
1127         0x3c278, 0xffffffff, 0x00000100,
1128         0x3c210, 0xffffffff, 0x06000100,
1129         0x3c290, 0xffffffff, 0x00000100,
1130         0x3c274, 0xffffffff, 0x00000100,
1131         0x3c2b4, 0xffffffff, 0x00000100,
1132         0x3c2b0, 0xffffffff, 0x00000100,
1133         0x3c270, 0xffffffff, 0x00000100,
1134         0x30800, 0xffffffff, 0xe0000000,
1135         0x3c020, 0xffffffff, 0x00010000,
1136         0x3c024, 0xffffffff, 0x00030002,
1137         0x3c028, 0xffffffff, 0x00040007,
1138         0x3c02c, 0xffffffff, 0x00060005,
1139         0x3c030, 0xffffffff, 0x00090008,
1140         0x3c034, 0xffffffff, 0x00010000,
1141         0x3c038, 0xffffffff, 0x00030002,
1142         0x3c03c, 0xffffffff, 0x00040007,
1143         0x3c040, 0xffffffff, 0x00060005,
1144         0x3c044, 0xffffffff, 0x00090008,
1145         0x3c048, 0xffffffff, 0x00010000,
1146         0x3c04c, 0xffffffff, 0x00030002,
1147         0x3c050, 0xffffffff, 0x00040007,
1148         0x3c054, 0xffffffff, 0x00060005,
1149         0x3c058, 0xffffffff, 0x00090008,
1150         0x3c05c, 0xffffffff, 0x00010000,
1151         0x3c060, 0xffffffff, 0x00030002,
1152         0x3c064, 0xffffffff, 0x00040007,
1153         0x3c068, 0xffffffff, 0x00060005,
1154         0x3c06c, 0xffffffff, 0x00090008,
1155         0x3c070, 0xffffffff, 0x00010000,
1156         0x3c074, 0xffffffff, 0x00030002,
1157         0x3c078, 0xffffffff, 0x00040007,
1158         0x3c07c, 0xffffffff, 0x00060005,
1159         0x3c080, 0xffffffff, 0x00090008,
1160         0x3c084, 0xffffffff, 0x00010000,
1161         0x3c088, 0xffffffff, 0x00030002,
1162         0x3c08c, 0xffffffff, 0x00040007,
1163         0x3c090, 0xffffffff, 0x00060005,
1164         0x3c094, 0xffffffff, 0x00090008,
1165         0x3c098, 0xffffffff, 0x00010000,
1166         0x3c09c, 0xffffffff, 0x00030002,
1167         0x3c0a0, 0xffffffff, 0x00040007,
1168         0x3c0a4, 0xffffffff, 0x00060005,
1169         0x3c0a8, 0xffffffff, 0x00090008,
1170         0x3c000, 0xffffffff, 0x96e00200,
1171         0x8708, 0xffffffff, 0x00900100,
1172         0xc424, 0xffffffff, 0x0020003f,
1173         0x38, 0xffffffff, 0x0140001c,
1174         0x3c, 0x000f0000, 0x000f0000,
1175         0x220, 0xffffffff, 0xC060000C,
1176         0x224, 0xc0000fff, 0x00000100,
1177         0xf90, 0xffffffff, 0x00000100,
1178         0xf98, 0x00000101, 0x00000000,
1179         0x20a8, 0xffffffff, 0x00000104,
1180         0x55e4, 0xff000fff, 0x00000100,
1181         0x30cc, 0xc0000fff, 0x00000104,
1182         0xc1e4, 0x00000001, 0x00000001,
1183         0xd00c, 0xff000ff0, 0x00000100,
1184         0xd80c, 0xff000ff0, 0x00000100
1185 };
1186
1187 static const u32 spectre_golden_spm_registers[] =
1188 {
1189         0x30800, 0xe0ffffff, 0xe0000000
1190 };
1191
1192 static const u32 spectre_golden_common_registers[] =
1193 {
1194         0xc770, 0xffffffff, 0x00000800,
1195         0xc774, 0xffffffff, 0x00000800,
1196         0xc798, 0xffffffff, 0x00007fbf,
1197         0xc79c, 0xffffffff, 0x00007faf
1198 };
1199
1200 static const u32 spectre_golden_registers[] =
1201 {
1202         0x3c000, 0xffff1fff, 0x96940200,
1203         0x3c00c, 0xffff0001, 0xff000000,
1204         0x3c200, 0xfffc0fff, 0x00000100,
1205         0x6ed8, 0x00010101, 0x00010000,
1206         0x9834, 0xf00fffff, 0x00000400,
1207         0x9838, 0xfffffffc, 0x00020200,
1208         0x5bb0, 0x000000f0, 0x00000070,
1209         0x5bc0, 0xf0311fff, 0x80300000,
1210         0x98f8, 0x73773777, 0x12010001,
1211         0x9b7c, 0x00ff0000, 0x00fc0000,
1212         0x2f48, 0x73773777, 0x12010001,
1213         0x8a14, 0xf000003f, 0x00000007,
1214         0x8b24, 0xffffffff, 0x00ffffff,
1215         0x28350, 0x3f3f3fff, 0x00000082,
1216         0x28354, 0x0000003f, 0x00000000,
1217         0x3e78, 0x00000001, 0x00000002,
1218         0x913c, 0xffff03df, 0x00000004,
1219         0xc768, 0x00000008, 0x00000008,
1220         0x8c00, 0x000008ff, 0x00000800,
1221         0x9508, 0x00010000, 0x00010000,
1222         0xac0c, 0xffffffff, 0x54763210,
1223         0x214f8, 0x01ff01ff, 0x00000002,
1224         0x21498, 0x007ff800, 0x00200000,
1225         0x2015c, 0xffffffff, 0x00000f40,
1226         0x30934, 0xffffffff, 0x00000001
1227 };
1228
1229 static const u32 spectre_mgcg_cgcg_init[] =
1230 {
1231         0xc420, 0xffffffff, 0xfffffffc,
1232         0x30800, 0xffffffff, 0xe0000000,
1233         0x3c2a0, 0xffffffff, 0x00000100,
1234         0x3c208, 0xffffffff, 0x00000100,
1235         0x3c2c0, 0xffffffff, 0x00000100,
1236         0x3c2c8, 0xffffffff, 0x00000100,
1237         0x3c2c4, 0xffffffff, 0x00000100,
1238         0x55e4, 0xffffffff, 0x00600100,
1239         0x3c280, 0xffffffff, 0x00000100,
1240         0x3c214, 0xffffffff, 0x06000100,
1241         0x3c220, 0xffffffff, 0x00000100,
1242         0x3c218, 0xffffffff, 0x06000100,
1243         0x3c204, 0xffffffff, 0x00000100,
1244         0x3c2e0, 0xffffffff, 0x00000100,
1245         0x3c224, 0xffffffff, 0x00000100,
1246         0x3c200, 0xffffffff, 0x00000100,
1247         0x3c230, 0xffffffff, 0x00000100,
1248         0x3c234, 0xffffffff, 0x00000100,
1249         0x3c250, 0xffffffff, 0x00000100,
1250         0x3c254, 0xffffffff, 0x00000100,
1251         0x3c258, 0xffffffff, 0x00000100,
1252         0x3c25c, 0xffffffff, 0x00000100,
1253         0x3c260, 0xffffffff, 0x00000100,
1254         0x3c27c, 0xffffffff, 0x00000100,
1255         0x3c278, 0xffffffff, 0x00000100,
1256         0x3c210, 0xffffffff, 0x06000100,
1257         0x3c290, 0xffffffff, 0x00000100,
1258         0x3c274, 0xffffffff, 0x00000100,
1259         0x3c2b4, 0xffffffff, 0x00000100,
1260         0x3c2b0, 0xffffffff, 0x00000100,
1261         0x3c270, 0xffffffff, 0x00000100,
1262         0x30800, 0xffffffff, 0xe0000000,
1263         0x3c020, 0xffffffff, 0x00010000,
1264         0x3c024, 0xffffffff, 0x00030002,
1265         0x3c028, 0xffffffff, 0x00040007,
1266         0x3c02c, 0xffffffff, 0x00060005,
1267         0x3c030, 0xffffffff, 0x00090008,
1268         0x3c034, 0xffffffff, 0x00010000,
1269         0x3c038, 0xffffffff, 0x00030002,
1270         0x3c03c, 0xffffffff, 0x00040007,
1271         0x3c040, 0xffffffff, 0x00060005,
1272         0x3c044, 0xffffffff, 0x00090008,
1273         0x3c048, 0xffffffff, 0x00010000,
1274         0x3c04c, 0xffffffff, 0x00030002,
1275         0x3c050, 0xffffffff, 0x00040007,
1276         0x3c054, 0xffffffff, 0x00060005,
1277         0x3c058, 0xffffffff, 0x00090008,
1278         0x3c05c, 0xffffffff, 0x00010000,
1279         0x3c060, 0xffffffff, 0x00030002,
1280         0x3c064, 0xffffffff, 0x00040007,
1281         0x3c068, 0xffffffff, 0x00060005,
1282         0x3c06c, 0xffffffff, 0x00090008,
1283         0x3c070, 0xffffffff, 0x00010000,
1284         0x3c074, 0xffffffff, 0x00030002,
1285         0x3c078, 0xffffffff, 0x00040007,
1286         0x3c07c, 0xffffffff, 0x00060005,
1287         0x3c080, 0xffffffff, 0x00090008,
1288         0x3c084, 0xffffffff, 0x00010000,
1289         0x3c088, 0xffffffff, 0x00030002,
1290         0x3c08c, 0xffffffff, 0x00040007,
1291         0x3c090, 0xffffffff, 0x00060005,
1292         0x3c094, 0xffffffff, 0x00090008,
1293         0x3c098, 0xffffffff, 0x00010000,
1294         0x3c09c, 0xffffffff, 0x00030002,
1295         0x3c0a0, 0xffffffff, 0x00040007,
1296         0x3c0a4, 0xffffffff, 0x00060005,
1297         0x3c0a8, 0xffffffff, 0x00090008,
1298         0x3c0ac, 0xffffffff, 0x00010000,
1299         0x3c0b0, 0xffffffff, 0x00030002,
1300         0x3c0b4, 0xffffffff, 0x00040007,
1301         0x3c0b8, 0xffffffff, 0x00060005,
1302         0x3c0bc, 0xffffffff, 0x00090008,
1303         0x3c000, 0xffffffff, 0x96e00200,
1304         0x8708, 0xffffffff, 0x00900100,
1305         0xc424, 0xffffffff, 0x0020003f,
1306         0x38, 0xffffffff, 0x0140001c,
1307         0x3c, 0x000f0000, 0x000f0000,
1308         0x220, 0xffffffff, 0xC060000C,
1309         0x224, 0xc0000fff, 0x00000100,
1310         0xf90, 0xffffffff, 0x00000100,
1311         0xf98, 0x00000101, 0x00000000,
1312         0x20a8, 0xffffffff, 0x00000104,
1313         0x55e4, 0xff000fff, 0x00000100,
1314         0x30cc, 0xc0000fff, 0x00000104,
1315         0xc1e4, 0x00000001, 0x00000001,
1316         0xd00c, 0xff000ff0, 0x00000100,
1317         0xd80c, 0xff000ff0, 0x00000100
1318 };
1319
1320 static const u32 kalindi_golden_spm_registers[] =
1321 {
1322         0x30800, 0xe0ffffff, 0xe0000000
1323 };
1324
1325 static const u32 kalindi_golden_common_registers[] =
1326 {
1327         0xc770, 0xffffffff, 0x00000800,
1328         0xc774, 0xffffffff, 0x00000800,
1329         0xc798, 0xffffffff, 0x00007fbf,
1330         0xc79c, 0xffffffff, 0x00007faf
1331 };
1332
1333 static const u32 kalindi_golden_registers[] =
1334 {
1335         0x3c000, 0xffffdfff, 0x6e944040,
1336         0x55e4, 0xff607fff, 0xfc000100,
1337         0x3c220, 0xff000fff, 0x00000100,
1338         0x3c224, 0xff000fff, 0x00000100,
1339         0x3c200, 0xfffc0fff, 0x00000100,
1340         0x6ed8, 0x00010101, 0x00010000,
1341         0x9830, 0xffffffff, 0x00000000,
1342         0x9834, 0xf00fffff, 0x00000400,
1343         0x5bb0, 0x000000f0, 0x00000070,
1344         0x5bc0, 0xf0311fff, 0x80300000,
1345         0x98f8, 0x73773777, 0x12010001,
1346         0x98fc, 0xffffffff, 0x00000010,
1347         0x9b7c, 0x00ff0000, 0x00fc0000,
1348         0x8030, 0x00001f0f, 0x0000100a,
1349         0x2f48, 0x73773777, 0x12010001,
1350         0x2408, 0x000fffff, 0x000c007f,
1351         0x8a14, 0xf000003f, 0x00000007,
1352         0x8b24, 0x3fff3fff, 0x00ffcfff,
1353         0x30a04, 0x0000ff0f, 0x00000000,
1354         0x28a4c, 0x07ffffff, 0x06000000,
1355         0x4d8, 0x00000fff, 0x00000100,
1356         0x3e78, 0x00000001, 0x00000002,
1357         0xc768, 0x00000008, 0x00000008,
1358         0x8c00, 0x000000ff, 0x00000003,
1359         0x214f8, 0x01ff01ff, 0x00000002,
1360         0x21498, 0x007ff800, 0x00200000,
1361         0x2015c, 0xffffffff, 0x00000f40,
1362         0x88c4, 0x001f3ae3, 0x00000082,
1363         0x88d4, 0x0000001f, 0x00000010,
1364         0x30934, 0xffffffff, 0x00000000
1365 };
1366
1367 static const u32 kalindi_mgcg_cgcg_init[] =
1368 {
1369         0xc420, 0xffffffff, 0xfffffffc,
1370         0x30800, 0xffffffff, 0xe0000000,
1371         0x3c2a0, 0xffffffff, 0x00000100,
1372         0x3c208, 0xffffffff, 0x00000100,
1373         0x3c2c0, 0xffffffff, 0x00000100,
1374         0x3c2c8, 0xffffffff, 0x00000100,
1375         0x3c2c4, 0xffffffff, 0x00000100,
1376         0x55e4, 0xffffffff, 0x00600100,
1377         0x3c280, 0xffffffff, 0x00000100,
1378         0x3c214, 0xffffffff, 0x06000100,
1379         0x3c220, 0xffffffff, 0x00000100,
1380         0x3c218, 0xffffffff, 0x06000100,
1381         0x3c204, 0xffffffff, 0x00000100,
1382         0x3c2e0, 0xffffffff, 0x00000100,
1383         0x3c224, 0xffffffff, 0x00000100,
1384         0x3c200, 0xffffffff, 0x00000100,
1385         0x3c230, 0xffffffff, 0x00000100,
1386         0x3c234, 0xffffffff, 0x00000100,
1387         0x3c250, 0xffffffff, 0x00000100,
1388         0x3c254, 0xffffffff, 0x00000100,
1389         0x3c258, 0xffffffff, 0x00000100,
1390         0x3c25c, 0xffffffff, 0x00000100,
1391         0x3c260, 0xffffffff, 0x00000100,
1392         0x3c27c, 0xffffffff, 0x00000100,
1393         0x3c278, 0xffffffff, 0x00000100,
1394         0x3c210, 0xffffffff, 0x06000100,
1395         0x3c290, 0xffffffff, 0x00000100,
1396         0x3c274, 0xffffffff, 0x00000100,
1397         0x3c2b4, 0xffffffff, 0x00000100,
1398         0x3c2b0, 0xffffffff, 0x00000100,
1399         0x3c270, 0xffffffff, 0x00000100,
1400         0x30800, 0xffffffff, 0xe0000000,
1401         0x3c020, 0xffffffff, 0x00010000,
1402         0x3c024, 0xffffffff, 0x00030002,
1403         0x3c028, 0xffffffff, 0x00040007,
1404         0x3c02c, 0xffffffff, 0x00060005,
1405         0x3c030, 0xffffffff, 0x00090008,
1406         0x3c034, 0xffffffff, 0x00010000,
1407         0x3c038, 0xffffffff, 0x00030002,
1408         0x3c03c, 0xffffffff, 0x00040007,
1409         0x3c040, 0xffffffff, 0x00060005,
1410         0x3c044, 0xffffffff, 0x00090008,
1411         0x3c000, 0xffffffff, 0x96e00200,
1412         0x8708, 0xffffffff, 0x00900100,
1413         0xc424, 0xffffffff, 0x0020003f,
1414         0x38, 0xffffffff, 0x0140001c,
1415         0x3c, 0x000f0000, 0x000f0000,
1416         0x220, 0xffffffff, 0xC060000C,
1417         0x224, 0xc0000fff, 0x00000100,
1418         0x20a8, 0xffffffff, 0x00000104,
1419         0x55e4, 0xff000fff, 0x00000100,
1420         0x30cc, 0xc0000fff, 0x00000104,
1421         0xc1e4, 0x00000001, 0x00000001,
1422         0xd00c, 0xff000ff0, 0x00000100,
1423         0xd80c, 0xff000ff0, 0x00000100
1424 };
1425
1426 static const u32 hawaii_golden_spm_registers[] =
1427 {
1428         0x30800, 0xe0ffffff, 0xe0000000
1429 };
1430
1431 static const u32 hawaii_golden_common_registers[] =
1432 {
1433         0x30800, 0xffffffff, 0xe0000000,
1434         0x28350, 0xffffffff, 0x3a00161a,
1435         0x28354, 0xffffffff, 0x0000002e,
1436         0x9a10, 0xffffffff, 0x00018208,
1437         0x98f8, 0xffffffff, 0x12011003
1438 };
1439
1440 static const u32 hawaii_golden_registers[] =
1441 {
1442         0x3354, 0x00000333, 0x00000333,
1443         0x9a10, 0x00010000, 0x00058208,
1444         0x9830, 0xffffffff, 0x00000000,
1445         0x9834, 0xf00fffff, 0x00000400,
1446         0x9838, 0x0002021c, 0x00020200,
1447         0xc78, 0x00000080, 0x00000000,
1448         0x5bb0, 0x000000f0, 0x00000070,
1449         0x5bc0, 0xf0311fff, 0x80300000,
1450         0x350c, 0x00810000, 0x408af000,
1451         0x7030, 0x31000111, 0x00000011,
1452         0x2f48, 0x73773777, 0x12010001,
1453         0x2120, 0x0000007f, 0x0000001b,
1454         0x21dc, 0x00007fb6, 0x00002191,
1455         0x3628, 0x0000003f, 0x0000000a,
1456         0x362c, 0x0000003f, 0x0000000a,
1457         0x2ae4, 0x00073ffe, 0x000022a2,
1458         0x240c, 0x000007ff, 0x00000000,
1459         0x8bf0, 0x00002001, 0x00000001,
1460         0x8b24, 0xffffffff, 0x00ffffff,
1461         0x30a04, 0x0000ff0f, 0x00000000,
1462         0x28a4c, 0x07ffffff, 0x06000000,
1463         0x3e78, 0x00000001, 0x00000002,
1464         0xc768, 0x00000008, 0x00000008,
1465         0xc770, 0x00000f00, 0x00000800,
1466         0xc774, 0x00000f00, 0x00000800,
1467         0xc798, 0x00ffffff, 0x00ff7fbf,
1468         0xc79c, 0x00ffffff, 0x00ff7faf,
1469         0x8c00, 0x000000ff, 0x00000800,
1470         0xe40, 0x00001fff, 0x00001fff,
1471         0x9060, 0x0000007f, 0x00000020,
1472         0x9508, 0x00010000, 0x00010000,
1473         0xae00, 0x00100000, 0x000ff07c,
1474         0xac14, 0x000003ff, 0x0000000f,
1475         0xac10, 0xffffffff, 0x7564fdec,
1476         0xac0c, 0xffffffff, 0x3120b9a8,
1477         0xac08, 0x20000000, 0x0f9c0000
1478 };
1479
1480 static const u32 hawaii_mgcg_cgcg_init[] =
1481 {
1482         0xc420, 0xffffffff, 0xfffffffd,
1483         0x30800, 0xffffffff, 0xe0000000,
1484         0x3c2a0, 0xffffffff, 0x00000100,
1485         0x3c208, 0xffffffff, 0x00000100,
1486         0x3c2c0, 0xffffffff, 0x00000100,
1487         0x3c2c8, 0xffffffff, 0x00000100,
1488         0x3c2c4, 0xffffffff, 0x00000100,
1489         0x55e4, 0xffffffff, 0x00200100,
1490         0x3c280, 0xffffffff, 0x00000100,
1491         0x3c214, 0xffffffff, 0x06000100,
1492         0x3c220, 0xffffffff, 0x00000100,
1493         0x3c218, 0xffffffff, 0x06000100,
1494         0x3c204, 0xffffffff, 0x00000100,
1495         0x3c2e0, 0xffffffff, 0x00000100,
1496         0x3c224, 0xffffffff, 0x00000100,
1497         0x3c200, 0xffffffff, 0x00000100,
1498         0x3c230, 0xffffffff, 0x00000100,
1499         0x3c234, 0xffffffff, 0x00000100,
1500         0x3c250, 0xffffffff, 0x00000100,
1501         0x3c254, 0xffffffff, 0x00000100,
1502         0x3c258, 0xffffffff, 0x00000100,
1503         0x3c25c, 0xffffffff, 0x00000100,
1504         0x3c260, 0xffffffff, 0x00000100,
1505         0x3c27c, 0xffffffff, 0x00000100,
1506         0x3c278, 0xffffffff, 0x00000100,
1507         0x3c210, 0xffffffff, 0x06000100,
1508         0x3c290, 0xffffffff, 0x00000100,
1509         0x3c274, 0xffffffff, 0x00000100,
1510         0x3c2b4, 0xffffffff, 0x00000100,
1511         0x3c2b0, 0xffffffff, 0x00000100,
1512         0x3c270, 0xffffffff, 0x00000100,
1513         0x30800, 0xffffffff, 0xe0000000,
1514         0x3c020, 0xffffffff, 0x00010000,
1515         0x3c024, 0xffffffff, 0x00030002,
1516         0x3c028, 0xffffffff, 0x00040007,
1517         0x3c02c, 0xffffffff, 0x00060005,
1518         0x3c030, 0xffffffff, 0x00090008,
1519         0x3c034, 0xffffffff, 0x00010000,
1520         0x3c038, 0xffffffff, 0x00030002,
1521         0x3c03c, 0xffffffff, 0x00040007,
1522         0x3c040, 0xffffffff, 0x00060005,
1523         0x3c044, 0xffffffff, 0x00090008,
1524         0x3c048, 0xffffffff, 0x00010000,
1525         0x3c04c, 0xffffffff, 0x00030002,
1526         0x3c050, 0xffffffff, 0x00040007,
1527         0x3c054, 0xffffffff, 0x00060005,
1528         0x3c058, 0xffffffff, 0x00090008,
1529         0x3c05c, 0xffffffff, 0x00010000,
1530         0x3c060, 0xffffffff, 0x00030002,
1531         0x3c064, 0xffffffff, 0x00040007,
1532         0x3c068, 0xffffffff, 0x00060005,
1533         0x3c06c, 0xffffffff, 0x00090008,
1534         0x3c070, 0xffffffff, 0x00010000,
1535         0x3c074, 0xffffffff, 0x00030002,
1536         0x3c078, 0xffffffff, 0x00040007,
1537         0x3c07c, 0xffffffff, 0x00060005,
1538         0x3c080, 0xffffffff, 0x00090008,
1539         0x3c084, 0xffffffff, 0x00010000,
1540         0x3c088, 0xffffffff, 0x00030002,
1541         0x3c08c, 0xffffffff, 0x00040007,
1542         0x3c090, 0xffffffff, 0x00060005,
1543         0x3c094, 0xffffffff, 0x00090008,
1544         0x3c098, 0xffffffff, 0x00010000,
1545         0x3c09c, 0xffffffff, 0x00030002,
1546         0x3c0a0, 0xffffffff, 0x00040007,
1547         0x3c0a4, 0xffffffff, 0x00060005,
1548         0x3c0a8, 0xffffffff, 0x00090008,
1549         0x3c0ac, 0xffffffff, 0x00010000,
1550         0x3c0b0, 0xffffffff, 0x00030002,
1551         0x3c0b4, 0xffffffff, 0x00040007,
1552         0x3c0b8, 0xffffffff, 0x00060005,
1553         0x3c0bc, 0xffffffff, 0x00090008,
1554         0x3c0c0, 0xffffffff, 0x00010000,
1555         0x3c0c4, 0xffffffff, 0x00030002,
1556         0x3c0c8, 0xffffffff, 0x00040007,
1557         0x3c0cc, 0xffffffff, 0x00060005,
1558         0x3c0d0, 0xffffffff, 0x00090008,
1559         0x3c0d4, 0xffffffff, 0x00010000,
1560         0x3c0d8, 0xffffffff, 0x00030002,
1561         0x3c0dc, 0xffffffff, 0x00040007,
1562         0x3c0e0, 0xffffffff, 0x00060005,
1563         0x3c0e4, 0xffffffff, 0x00090008,
1564         0x3c0e8, 0xffffffff, 0x00010000,
1565         0x3c0ec, 0xffffffff, 0x00030002,
1566         0x3c0f0, 0xffffffff, 0x00040007,
1567         0x3c0f4, 0xffffffff, 0x00060005,
1568         0x3c0f8, 0xffffffff, 0x00090008,
1569         0xc318, 0xffffffff, 0x00020200,
1570         0x3350, 0xffffffff, 0x00000200,
1571         0x15c0, 0xffffffff, 0x00000400,
1572         0x55e8, 0xffffffff, 0x00000000,
1573         0x2f50, 0xffffffff, 0x00000902,
1574         0x3c000, 0xffffffff, 0x96940200,
1575         0x8708, 0xffffffff, 0x00900100,
1576         0xc424, 0xffffffff, 0x0020003f,
1577         0x38, 0xffffffff, 0x0140001c,
1578         0x3c, 0x000f0000, 0x000f0000,
1579         0x220, 0xffffffff, 0xc060000c,
1580         0x224, 0xc0000fff, 0x00000100,
1581         0xf90, 0xffffffff, 0x00000100,
1582         0xf98, 0x00000101, 0x00000000,
1583         0x20a8, 0xffffffff, 0x00000104,
1584         0x55e4, 0xff000fff, 0x00000100,
1585         0x30cc, 0xc0000fff, 0x00000104,
1586         0xc1e4, 0x00000001, 0x00000001,
1587         0xd00c, 0xff000ff0, 0x00000100,
1588         0xd80c, 0xff000ff0, 0x00000100
1589 };
1590
1591 static const u32 godavari_golden_registers[] =
1592 {
1593         0x55e4, 0xff607fff, 0xfc000100,
1594         0x6ed8, 0x00010101, 0x00010000,
1595         0x9830, 0xffffffff, 0x00000000,
1596         0x98302, 0xf00fffff, 0x00000400,
1597         0x6130, 0xffffffff, 0x00010000,
1598         0x5bb0, 0x000000f0, 0x00000070,
1599         0x5bc0, 0xf0311fff, 0x80300000,
1600         0x98f8, 0x73773777, 0x12010001,
1601         0x98fc, 0xffffffff, 0x00000010,
1602         0x8030, 0x00001f0f, 0x0000100a,
1603         0x2f48, 0x73773777, 0x12010001,
1604         0x2408, 0x000fffff, 0x000c007f,
1605         0x8a14, 0xf000003f, 0x00000007,
1606         0x8b24, 0xffffffff, 0x00ff0fff,
1607         0x30a04, 0x0000ff0f, 0x00000000,
1608         0x28a4c, 0x07ffffff, 0x06000000,
1609         0x4d8, 0x00000fff, 0x00000100,
1610         0xd014, 0x00010000, 0x00810001,
1611         0xd814, 0x00010000, 0x00810001,
1612         0x3e78, 0x00000001, 0x00000002,
1613         0xc768, 0x00000008, 0x00000008,
1614         0xc770, 0x00000f00, 0x00000800,
1615         0xc774, 0x00000f00, 0x00000800,
1616         0xc798, 0x00ffffff, 0x00ff7fbf,
1617         0xc79c, 0x00ffffff, 0x00ff7faf,
1618         0x8c00, 0x000000ff, 0x00000001,
1619         0x214f8, 0x01ff01ff, 0x00000002,
1620         0x21498, 0x007ff800, 0x00200000,
1621         0x2015c, 0xffffffff, 0x00000f40,
1622         0x88c4, 0x001f3ae3, 0x00000082,
1623         0x88d4, 0x0000001f, 0x00000010,
1624         0x30934, 0xffffffff, 0x00000000
1625 };
1626
1627
1628 static void cik_init_golden_registers(struct radeon_device *rdev)
1629 {
1630         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1631         mutex_lock(&rdev->grbm_idx_mutex);
1632         switch (rdev->family) {
1633         case CHIP_BONAIRE:
1634                 radeon_program_register_sequence(rdev,
1635                                                  bonaire_mgcg_cgcg_init,
1636                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1637                 radeon_program_register_sequence(rdev,
1638                                                  bonaire_golden_registers,
1639                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1640                 radeon_program_register_sequence(rdev,
1641                                                  bonaire_golden_common_registers,
1642                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1643                 radeon_program_register_sequence(rdev,
1644                                                  bonaire_golden_spm_registers,
1645                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1646                 break;
1647         case CHIP_KABINI:
1648                 radeon_program_register_sequence(rdev,
1649                                                  kalindi_mgcg_cgcg_init,
1650                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1651                 radeon_program_register_sequence(rdev,
1652                                                  kalindi_golden_registers,
1653                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1654                 radeon_program_register_sequence(rdev,
1655                                                  kalindi_golden_common_registers,
1656                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1657                 radeon_program_register_sequence(rdev,
1658                                                  kalindi_golden_spm_registers,
1659                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1660                 break;
1661         case CHIP_MULLINS:
1662                 radeon_program_register_sequence(rdev,
1663                                                  kalindi_mgcg_cgcg_init,
1664                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1665                 radeon_program_register_sequence(rdev,
1666                                                  godavari_golden_registers,
1667                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1668                 radeon_program_register_sequence(rdev,
1669                                                  kalindi_golden_common_registers,
1670                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1671                 radeon_program_register_sequence(rdev,
1672                                                  kalindi_golden_spm_registers,
1673                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1674                 break;
1675         case CHIP_KAVERI:
1676                 radeon_program_register_sequence(rdev,
1677                                                  spectre_mgcg_cgcg_init,
1678                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1679                 radeon_program_register_sequence(rdev,
1680                                                  spectre_golden_registers,
1681                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1682                 radeon_program_register_sequence(rdev,
1683                                                  spectre_golden_common_registers,
1684                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1685                 radeon_program_register_sequence(rdev,
1686                                                  spectre_golden_spm_registers,
1687                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1688                 break;
1689         case CHIP_HAWAII:
1690                 radeon_program_register_sequence(rdev,
1691                                                  hawaii_mgcg_cgcg_init,
1692                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1693                 radeon_program_register_sequence(rdev,
1694                                                  hawaii_golden_registers,
1695                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1696                 radeon_program_register_sequence(rdev,
1697                                                  hawaii_golden_common_registers,
1698                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1699                 radeon_program_register_sequence(rdev,
1700                                                  hawaii_golden_spm_registers,
1701                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1702                 break;
1703         default:
1704                 break;
1705         }
1706         mutex_unlock(&rdev->grbm_idx_mutex);
1707 }
1708
1709 /**
1710  * cik_get_xclk - get the xclk
1711  *
1712  * @rdev: radeon_device pointer
1713  *
1714  * Returns the reference clock used by the gfx engine
1715  * (CIK).
1716  */
1717 u32 cik_get_xclk(struct radeon_device *rdev)
1718 {
1719         u32 reference_clock = rdev->clock.spll.reference_freq;
1720
1721         if (rdev->flags & RADEON_IS_IGP) {
1722                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1723                         return reference_clock / 2;
1724         } else {
1725                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1726                         return reference_clock / 4;
1727         }
1728         return reference_clock;
1729 }
1730
1731 /**
1732  * cik_mm_rdoorbell - read a doorbell dword
1733  *
1734  * @rdev: radeon_device pointer
1735  * @index: doorbell index
1736  *
1737  * Returns the value in the doorbell aperture at the
1738  * requested doorbell index (CIK).
1739  */
1740 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1741 {
1742         if (index < rdev->doorbell.num_doorbells) {
1743                 return readl(rdev->doorbell.ptr + index);
1744         } else {
1745                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1746                 return 0;
1747         }
1748 }
1749
1750 /**
1751  * cik_mm_wdoorbell - write a doorbell dword
1752  *
1753  * @rdev: radeon_device pointer
1754  * @index: doorbell index
1755  * @v: value to write
1756  *
1757  * Writes @v to the doorbell aperture at the
1758  * requested doorbell index (CIK).
1759  */
1760 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1761 {
1762         if (index < rdev->doorbell.num_doorbells) {
1763                 writel(v, rdev->doorbell.ptr + index);
1764         } else {
1765                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1766         }
1767 }
1768
1769 #define BONAIRE_IO_MC_REGS_SIZE 36
1770
1771 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1772 {
1773         {0x00000070, 0x04400000},
1774         {0x00000071, 0x80c01803},
1775         {0x00000072, 0x00004004},
1776         {0x00000073, 0x00000100},
1777         {0x00000074, 0x00ff0000},
1778         {0x00000075, 0x34000000},
1779         {0x00000076, 0x08000014},
1780         {0x00000077, 0x00cc08ec},
1781         {0x00000078, 0x00000400},
1782         {0x00000079, 0x00000000},
1783         {0x0000007a, 0x04090000},
1784         {0x0000007c, 0x00000000},
1785         {0x0000007e, 0x4408a8e8},
1786         {0x0000007f, 0x00000304},
1787         {0x00000080, 0x00000000},
1788         {0x00000082, 0x00000001},
1789         {0x00000083, 0x00000002},
1790         {0x00000084, 0xf3e4f400},
1791         {0x00000085, 0x052024e3},
1792         {0x00000087, 0x00000000},
1793         {0x00000088, 0x01000000},
1794         {0x0000008a, 0x1c0a0000},
1795         {0x0000008b, 0xff010000},
1796         {0x0000008d, 0xffffefff},
1797         {0x0000008e, 0xfff3efff},
1798         {0x0000008f, 0xfff3efbf},
1799         {0x00000092, 0xf7ffffff},
1800         {0x00000093, 0xffffff7f},
1801         {0x00000095, 0x00101101},
1802         {0x00000096, 0x00000fff},
1803         {0x00000097, 0x00116fff},
1804         {0x00000098, 0x60010000},
1805         {0x00000099, 0x10010000},
1806         {0x0000009a, 0x00006000},
1807         {0x0000009b, 0x00001000},
1808         {0x0000009f, 0x00b48000}
1809 };
1810
1811 #define HAWAII_IO_MC_REGS_SIZE 22
1812
1813 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1814 {
1815         {0x0000007d, 0x40000000},
1816         {0x0000007e, 0x40180304},
1817         {0x0000007f, 0x0000ff00},
1818         {0x00000081, 0x00000000},
1819         {0x00000083, 0x00000800},
1820         {0x00000086, 0x00000000},
1821         {0x00000087, 0x00000100},
1822         {0x00000088, 0x00020100},
1823         {0x00000089, 0x00000000},
1824         {0x0000008b, 0x00040000},
1825         {0x0000008c, 0x00000100},
1826         {0x0000008e, 0xff010000},
1827         {0x00000090, 0xffffefff},
1828         {0x00000091, 0xfff3efff},
1829         {0x00000092, 0xfff3efbf},
1830         {0x00000093, 0xf7ffffff},
1831         {0x00000094, 0xffffff7f},
1832         {0x00000095, 0x00000fff},
1833         {0x00000096, 0x00116fff},
1834         {0x00000097, 0x60010000},
1835         {0x00000098, 0x10010000},
1836         {0x0000009f, 0x00c79000}
1837 };
1838
1839
1840 /**
1841  * cik_srbm_select - select specific register instances
1842  *
1843  * @rdev: radeon_device pointer
1844  * @me: selected ME (micro engine)
1845  * @pipe: pipe
1846  * @queue: queue
1847  * @vmid: VMID
1848  *
1849  * Switches the currently active registers instances.  Some
1850  * registers are instanced per VMID, others are instanced per
1851  * me/pipe/queue combination.
1852  */
1853 static void cik_srbm_select(struct radeon_device *rdev,
1854                             u32 me, u32 pipe, u32 queue, u32 vmid)
1855 {
1856         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1857                              MEID(me & 0x3) |
1858                              VMID(vmid & 0xf) |
1859                              QUEUEID(queue & 0x7));
1860         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1861 }
1862
1863 /* ucode loading */
1864 /**
1865  * ci_mc_load_microcode - load MC ucode into the hw
1866  *
1867  * @rdev: radeon_device pointer
1868  *
1869  * Load the GDDR MC ucode into the hw (CIK).
1870  * Returns 0 on success, error on failure.
1871  */
1872 int ci_mc_load_microcode(struct radeon_device *rdev)
1873 {
1874         const __be32 *fw_data = NULL;
1875         const __le32 *new_fw_data = NULL;
1876         u32 running, tmp;
1877         u32 *io_mc_regs = NULL;
1878         const __le32 *new_io_mc_regs = NULL;
1879         int i, regs_size, ucode_size;
1880
1881         if (!rdev->mc_fw)
1882                 return -EINVAL;
1883
1884         if (rdev->new_fw) {
1885                 const struct mc_firmware_header_v1_0 *hdr =
1886                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1887
1888                 radeon_ucode_print_mc_hdr(&hdr->header);
1889
1890                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1891                 new_io_mc_regs = (const __le32 *)
1892                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1893                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1894                 new_fw_data = (const __le32 *)
1895                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1896         } else {
1897                 ucode_size = rdev->mc_fw->size / 4;
1898
1899                 switch (rdev->family) {
1900                 case CHIP_BONAIRE:
1901                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1902                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1903                         break;
1904                 case CHIP_HAWAII:
1905                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1906                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1907                         break;
1908                 default:
1909                         return -EINVAL;
1910                 }
1911                 fw_data = (const __be32 *)rdev->mc_fw->data;
1912         }
1913
1914         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1915
1916         if (running == 0) {
1917                 /* reset the engine and set to writable */
1918                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1920
1921                 /* load mc io regs */
1922                 for (i = 0; i < regs_size; i++) {
1923                         if (rdev->new_fw) {
1924                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1925                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1926                         } else {
1927                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1928                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1929                         }
1930                 }
1931
1932                 tmp = RREG32(MC_SEQ_MISC0);
1933                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1934                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1935                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1936                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1937                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1938                 }
1939
1940                 /* load the MC ucode */
1941                 for (i = 0; i < ucode_size; i++) {
1942                         if (rdev->new_fw)
1943                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1944                         else
1945                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1946                 }
1947
1948                 /* put the engine back into the active state */
1949                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1952
1953                 /* wait for training to complete */
1954                 for (i = 0; i < rdev->usec_timeout; i++) {
1955                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1956                                 break;
1957                         udelay(1);
1958                 }
1959                 for (i = 0; i < rdev->usec_timeout; i++) {
1960                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1961                                 break;
1962                         udelay(1);
1963                 }
1964         }
1965
1966         return 0;
1967 }
1968
1969 /**
1970  * cik_init_microcode - load ucode images from disk
1971  *
1972  * @rdev: radeon_device pointer
1973  *
1974  * Use the firmware interface to load the ucode images into
1975  * the driver (not loaded into hw).
1976  * Returns 0 on success, error on failure.
1977  */
1978 static int cik_init_microcode(struct radeon_device *rdev)
1979 {
1980         const char *chip_name;
1981         const char *new_chip_name;
1982         size_t pfp_req_size, me_req_size, ce_req_size,
1983                 mec_req_size, rlc_req_size, mc_req_size = 0,
1984                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1985         char fw_name[30];
1986         int new_fw = 0;
1987         int err;
1988         int num_fw;
1989         bool new_smc = false;
1990
1991         DRM_DEBUG("\n");
1992
1993         switch (rdev->family) {
1994         case CHIP_BONAIRE:
1995                 chip_name = "BONAIRE";
1996                 if ((rdev->pdev->revision == 0x80) ||
1997                     (rdev->pdev->revision == 0x81) ||
1998                     (rdev->pdev->device == 0x665f))
1999                         new_smc = true;
2000                 new_chip_name = "bonaire";
2001                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2002                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2003                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2004                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2005                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2006                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2007                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2008                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2010                 num_fw = 8;
2011                 break;
2012         case CHIP_HAWAII:
2013                 chip_name = "HAWAII";
2014                 if (rdev->pdev->revision == 0x80)
2015                         new_smc = true;
2016                 new_chip_name = "hawaii";
2017                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2018                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2019                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2020                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2021                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2022                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2023                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2024                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2026                 num_fw = 8;
2027                 break;
2028         case CHIP_KAVERI:
2029                 chip_name = "KAVERI";
2030                 new_chip_name = "kaveri";
2031                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2032                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2033                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2034                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2035                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2036                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2037                 num_fw = 7;
2038                 break;
2039         case CHIP_KABINI:
2040                 chip_name = "KABINI";
2041                 new_chip_name = "kabini";
2042                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2043                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2044                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2045                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2046                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2047                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2048                 num_fw = 6;
2049                 break;
2050         case CHIP_MULLINS:
2051                 chip_name = "MULLINS";
2052                 new_chip_name = "mullins";
2053                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2054                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2055                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2056                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2057                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2058                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2059                 num_fw = 6;
2060                 break;
2061         default: BUG();
2062         }
2063
2064         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2065
2066         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2067         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068         if (err) {
2069                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2070                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2071                 if (err)
2072                         goto out;
2073                 if (rdev->pfp_fw->size != pfp_req_size) {
2074                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2075                                rdev->pfp_fw->size, fw_name);
2076                         err = -EINVAL;
2077                         goto out;
2078                 }
2079         } else {
2080                 err = radeon_ucode_validate(rdev->pfp_fw);
2081                 if (err) {
2082                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2083                                fw_name);
2084                         goto out;
2085                 } else {
2086                         new_fw++;
2087                 }
2088         }
2089
2090         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2091         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092         if (err) {
2093                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2094                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2095                 if (err)
2096                         goto out;
2097                 if (rdev->me_fw->size != me_req_size) {
2098                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->me_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->me_fw);
2104                 if (err) {
2105                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2106                                fw_name);
2107                         goto out;
2108                 } else {
2109                         new_fw++;
2110                 }
2111         }
2112
2113         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2114         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115         if (err) {
2116                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2117                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2118                 if (err)
2119                         goto out;
2120                 if (rdev->ce_fw->size != ce_req_size) {
2121                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2122                                rdev->ce_fw->size, fw_name);
2123                         err = -EINVAL;
2124                 }
2125         } else {
2126                 err = radeon_ucode_validate(rdev->ce_fw);
2127                 if (err) {
2128                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2129                                fw_name);
2130                         goto out;
2131                 } else {
2132                         new_fw++;
2133                 }
2134         }
2135
2136         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2137         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138         if (err) {
2139                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2140                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141                 if (err)
2142                         goto out;
2143                 if (rdev->mec_fw->size != mec_req_size) {
2144                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2145                                rdev->mec_fw->size, fw_name);
2146                         err = -EINVAL;
2147                 }
2148         } else {
2149                 err = radeon_ucode_validate(rdev->mec_fw);
2150                 if (err) {
2151                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2152                                fw_name);
2153                         goto out;
2154                 } else {
2155                         new_fw++;
2156                 }
2157         }
2158
2159         if (rdev->family == CHIP_KAVERI) {
2160                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2161                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2162                 if (err) {
2163                         goto out;
2164                 } else {
2165                         err = radeon_ucode_validate(rdev->mec2_fw);
2166                         if (err) {
2167                                 goto out;
2168                         } else {
2169                                 new_fw++;
2170                         }
2171                 }
2172         }
2173
2174         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2175         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176         if (err) {
2177                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2178                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2179                 if (err)
2180                         goto out;
2181                 if (rdev->rlc_fw->size != rlc_req_size) {
2182                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2183                                rdev->rlc_fw->size, fw_name);
2184                         err = -EINVAL;
2185                 }
2186         } else {
2187                 err = radeon_ucode_validate(rdev->rlc_fw);
2188                 if (err) {
2189                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2190                                fw_name);
2191                         goto out;
2192                 } else {
2193                         new_fw++;
2194                 }
2195         }
2196
2197         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2198         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199         if (err) {
2200                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2201                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2202                 if (err)
2203                         goto out;
2204                 if (rdev->sdma_fw->size != sdma_req_size) {
2205                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2206                                rdev->sdma_fw->size, fw_name);
2207                         err = -EINVAL;
2208                 }
2209         } else {
2210                 err = radeon_ucode_validate(rdev->sdma_fw);
2211                 if (err) {
2212                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2213                                fw_name);
2214                         goto out;
2215                 } else {
2216                         new_fw++;
2217                 }
2218         }
2219
2220         /* No SMC, MC ucode on APUs */
2221         if (!(rdev->flags & RADEON_IS_IGP)) {
2222                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2223                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224                 if (err) {
2225                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2226                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227                         if (err) {
2228                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2229                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2230                                 if (err)
2231                                         goto out;
2232                         }
2233                         if ((rdev->mc_fw->size != mc_req_size) &&
2234                             (rdev->mc_fw->size != mc2_req_size)){
2235                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2236                                        rdev->mc_fw->size, fw_name);
2237                                 err = -EINVAL;
2238                         }
2239                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2240                 } else {
2241                         err = radeon_ucode_validate(rdev->mc_fw);
2242                         if (err) {
2243                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2244                                        fw_name);
2245                                 goto out;
2246                         } else {
2247                                 new_fw++;
2248                         }
2249                 }
2250
2251                 if (new_smc)
2252                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2253                 else
2254                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2255                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256                 if (err) {
2257                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2258                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2259                         if (err) {
2260                                 pr_err("smc: error loading firmware \"%s\"\n",
2261                                        fw_name);
2262                                 release_firmware(rdev->smc_fw);
2263                                 rdev->smc_fw = NULL;
2264                                 err = 0;
2265                         } else if (rdev->smc_fw->size != smc_req_size) {
2266                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2267                                        rdev->smc_fw->size, fw_name);
2268                                 err = -EINVAL;
2269                         }
2270                 } else {
2271                         err = radeon_ucode_validate(rdev->smc_fw);
2272                         if (err) {
2273                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2274                                        fw_name);
2275                                 goto out;
2276                         } else {
2277                                 new_fw++;
2278                         }
2279                 }
2280         }
2281
2282         if (new_fw == 0) {
2283                 rdev->new_fw = false;
2284         } else if (new_fw < num_fw) {
2285                 pr_err("ci_fw: mixing new and old firmware!\n");
2286                 err = -EINVAL;
2287         } else {
2288                 rdev->new_fw = true;
2289         }
2290
2291 out:
2292         if (err) {
2293                 if (err != -EINVAL)
2294                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2295                                fw_name);
2296                 release_firmware(rdev->pfp_fw);
2297                 rdev->pfp_fw = NULL;
2298                 release_firmware(rdev->me_fw);
2299                 rdev->me_fw = NULL;
2300                 release_firmware(rdev->ce_fw);
2301                 rdev->ce_fw = NULL;
2302                 release_firmware(rdev->mec_fw);
2303                 rdev->mec_fw = NULL;
2304                 release_firmware(rdev->mec2_fw);
2305                 rdev->mec2_fw = NULL;
2306                 release_firmware(rdev->rlc_fw);
2307                 rdev->rlc_fw = NULL;
2308                 release_firmware(rdev->sdma_fw);
2309                 rdev->sdma_fw = NULL;
2310                 release_firmware(rdev->mc_fw);
2311                 rdev->mc_fw = NULL;
2312                 release_firmware(rdev->smc_fw);
2313                 rdev->smc_fw = NULL;
2314         }
2315         return err;
2316 }
2317
2318 /*
2319  * Core functions
2320  */
2321 /**
2322  * cik_tiling_mode_table_init - init the hw tiling table
2323  *
2324  * @rdev: radeon_device pointer
2325  *
2326  * Starting with SI, the tiling setup is done globally in a
2327  * set of 32 tiling modes.  Rather than selecting each set of
2328  * parameters per surface as on older asics, we just select
2329  * which index in the tiling table we want to use, and the
2330  * surface uses those parameters (CIK).
2331  */
2332 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2333 {
2334         u32 *tile = rdev->config.cik.tile_mode_array;
2335         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2336         const u32 num_tile_mode_states =
2337                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2338         const u32 num_secondary_tile_mode_states =
2339                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2340         u32 reg_offset, split_equal_to_row_size;
2341         u32 num_pipe_configs;
2342         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2343                 rdev->config.cik.max_shader_engines;
2344
2345         switch (rdev->config.cik.mem_row_size_in_kb) {
2346         case 1:
2347                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2348                 break;
2349         case 2:
2350         default:
2351                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2352                 break;
2353         case 4:
2354                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2355                 break;
2356         }
2357
2358         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2359         if (num_pipe_configs > 8)
2360                 num_pipe_configs = 16;
2361
2362         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2363                 tile[reg_offset] = 0;
2364         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2365                 macrotile[reg_offset] = 0;
2366
2367         switch(num_pipe_configs) {
2368         case 16:
2369                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2373                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2377                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2379                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2381                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2383                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2385                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                            TILE_SPLIT(split_equal_to_row_size));
2389                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2392                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2393                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2396                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2397                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2398                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                            TILE_SPLIT(split_equal_to_row_size));
2400                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2401                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2402                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2405                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2410                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2411                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2412                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2413                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2414                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2420                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2426                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2427                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2433                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2435                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2439                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2442                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2444                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2445                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447
2448                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451                            NUM_BANKS(ADDR_SURF_16_BANK));
2452                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2454                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455                            NUM_BANKS(ADDR_SURF_16_BANK));
2456                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                            NUM_BANKS(ADDR_SURF_16_BANK));
2460                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                            NUM_BANKS(ADDR_SURF_16_BANK));
2464                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467                            NUM_BANKS(ADDR_SURF_8_BANK));
2468                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                            NUM_BANKS(ADDR_SURF_4_BANK));
2472                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                            NUM_BANKS(ADDR_SURF_2_BANK));
2476                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479                            NUM_BANKS(ADDR_SURF_16_BANK));
2480                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2482                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2483                            NUM_BANKS(ADDR_SURF_16_BANK));
2484                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                             NUM_BANKS(ADDR_SURF_16_BANK));
2488                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                             NUM_BANKS(ADDR_SURF_8_BANK));
2492                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495                             NUM_BANKS(ADDR_SURF_4_BANK));
2496                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                             NUM_BANKS(ADDR_SURF_2_BANK));
2500                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503                             NUM_BANKS(ADDR_SURF_2_BANK));
2504
2505                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2506                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2507                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2508                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2509                 break;
2510
2511         case 8:
2512                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2516                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2520                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2522                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2524                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2528                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                            TILE_SPLIT(split_equal_to_row_size));
2532                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2536                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2537                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2539                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2540                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2541                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542                            TILE_SPLIT(split_equal_to_row_size));
2543                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2544                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2545                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2548                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2557                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2558                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2560                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2561                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2563                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2568                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2570                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2572                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2578                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2582                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2585                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2586                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2587                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590
2591                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2593                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2594                                 NUM_BANKS(ADDR_SURF_16_BANK));
2595                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2597                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2598                                 NUM_BANKS(ADDR_SURF_16_BANK));
2599                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2602                                 NUM_BANKS(ADDR_SURF_16_BANK));
2603                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2606                                 NUM_BANKS(ADDR_SURF_16_BANK));
2607                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2610                                 NUM_BANKS(ADDR_SURF_8_BANK));
2611                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2614                                 NUM_BANKS(ADDR_SURF_4_BANK));
2615                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2618                                 NUM_BANKS(ADDR_SURF_2_BANK));
2619                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2626                                 NUM_BANKS(ADDR_SURF_16_BANK));
2627                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2630                                 NUM_BANKS(ADDR_SURF_16_BANK));
2631                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2638                                 NUM_BANKS(ADDR_SURF_8_BANK));
2639                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642                                 NUM_BANKS(ADDR_SURF_4_BANK));
2643                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2646                                 NUM_BANKS(ADDR_SURF_2_BANK));
2647
2648                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2649                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2650                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2651                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2652                 break;
2653
2654         case 4:
2655                 if (num_rbs == 4) {
2656                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2660                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2664                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2666                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2668                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2672                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                            TILE_SPLIT(split_equal_to_row_size));
2676                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2679                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2683                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2684                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2685                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                            TILE_SPLIT(split_equal_to_row_size));
2687                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2688                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2689                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2692                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2701                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2704                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2707                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2716                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2722                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2723                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2731                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734
2735                 } else if (num_rbs < 4) {
2736                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2740                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2744                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2746                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2748                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2752                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755                            TILE_SPLIT(split_equal_to_row_size));
2756                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2757                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2759                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2760                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2761                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2762                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2763                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2764                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2765                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2766                            TILE_SPLIT(split_equal_to_row_size));
2767                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2768                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2769                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2770                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2772                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2774                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2775                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2776                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2777                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2778                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2780                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2781                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2782                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2784                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2785                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2787                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2789                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2791                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2792                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2793                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2794                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2796                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2797                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2800                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2802                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2803                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2804                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2808                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2809                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2811                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814                 }
2815
2816                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819                                 NUM_BANKS(ADDR_SURF_16_BANK));
2820                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2822                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2823                                 NUM_BANKS(ADDR_SURF_16_BANK));
2824                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827                                 NUM_BANKS(ADDR_SURF_16_BANK));
2828                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839                                 NUM_BANKS(ADDR_SURF_8_BANK));
2840                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2843                                 NUM_BANKS(ADDR_SURF_4_BANK));
2844                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2863                                 NUM_BANKS(ADDR_SURF_16_BANK));
2864                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2867                                 NUM_BANKS(ADDR_SURF_8_BANK));
2868                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2871                                 NUM_BANKS(ADDR_SURF_4_BANK));
2872
2873                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2874                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2875                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2876                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2877                 break;
2878
2879         case 2:
2880                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882                            PIPE_CONFIG(ADDR_SURF_P2) |
2883                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2884                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886                            PIPE_CONFIG(ADDR_SURF_P2) |
2887                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2888                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2890                            PIPE_CONFIG(ADDR_SURF_P2) |
2891                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2892                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894                            PIPE_CONFIG(ADDR_SURF_P2) |
2895                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2896                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898                            PIPE_CONFIG(ADDR_SURF_P2) |
2899                            TILE_SPLIT(split_equal_to_row_size));
2900                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2901                            PIPE_CONFIG(ADDR_SURF_P2) |
2902                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2904                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905                            PIPE_CONFIG(ADDR_SURF_P2) |
2906                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2907                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2908                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2909                            PIPE_CONFIG(ADDR_SURF_P2) |
2910                            TILE_SPLIT(split_equal_to_row_size));
2911                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2912                            PIPE_CONFIG(ADDR_SURF_P2);
2913                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2914                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915                            PIPE_CONFIG(ADDR_SURF_P2));
2916                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2918                             PIPE_CONFIG(ADDR_SURF_P2) |
2919                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2922                             PIPE_CONFIG(ADDR_SURF_P2) |
2923                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2925                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2926                             PIPE_CONFIG(ADDR_SURF_P2) |
2927                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929                             PIPE_CONFIG(ADDR_SURF_P2) |
2930                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2931                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2933                             PIPE_CONFIG(ADDR_SURF_P2) |
2934                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2935                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2937                             PIPE_CONFIG(ADDR_SURF_P2) |
2938                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2940                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2941                             PIPE_CONFIG(ADDR_SURF_P2) |
2942                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945                             PIPE_CONFIG(ADDR_SURF_P2));
2946                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2947                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2948                             PIPE_CONFIG(ADDR_SURF_P2) |
2949                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2951                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2952                             PIPE_CONFIG(ADDR_SURF_P2) |
2953                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2955                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2956                             PIPE_CONFIG(ADDR_SURF_P2) |
2957                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958
2959                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2960                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2961                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962                                 NUM_BANKS(ADDR_SURF_16_BANK));
2963                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2964                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2965                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966                                 NUM_BANKS(ADDR_SURF_16_BANK));
2967                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2969                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970                                 NUM_BANKS(ADDR_SURF_16_BANK));
2971                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2974                                 NUM_BANKS(ADDR_SURF_16_BANK));
2975                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                 NUM_BANKS(ADDR_SURF_16_BANK));
2979                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                 NUM_BANKS(ADDR_SURF_16_BANK));
2983                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2986                                 NUM_BANKS(ADDR_SURF_8_BANK));
2987                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3005                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006                                 NUM_BANKS(ADDR_SURF_16_BANK));
3007                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3014                                 NUM_BANKS(ADDR_SURF_8_BANK));
3015
3016                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3017                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3018                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3019                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3020                 break;
3021
3022         default:
3023                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3024         }
3025 }
3026
3027 /**
3028  * cik_select_se_sh - select which SE, SH to address
3029  *
3030  * @rdev: radeon_device pointer
3031  * @se_num: shader engine to address
3032  * @sh_num: sh block to address
3033  *
3034  * Select which SE, SH combinations to address. Certain
3035  * registers are instanced per SE or SH.  0xffffffff means
3036  * broadcast to all SEs or SHs (CIK).
3037  */
3038 static void cik_select_se_sh(struct radeon_device *rdev,
3039                              u32 se_num, u32 sh_num)
3040 {
3041         u32 data = INSTANCE_BROADCAST_WRITES;
3042
3043         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3044                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3045         else if (se_num == 0xffffffff)
3046                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3047         else if (sh_num == 0xffffffff)
3048                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3049         else
3050                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3051         WREG32(GRBM_GFX_INDEX, data);
3052 }
3053
3054 /**
3055  * cik_create_bitmask - create a bitmask
3056  *
3057  * @bit_width: length of the mask
3058  *
3059  * create a variable length bit mask (CIK).
3060  * Returns the bitmask.
3061  */
3062 static u32 cik_create_bitmask(u32 bit_width)
3063 {
3064         u32 i, mask = 0;
3065
3066         for (i = 0; i < bit_width; i++) {
3067                 mask <<= 1;
3068                 mask |= 1;
3069         }
3070         return mask;
3071 }
3072
3073 /**
3074  * cik_get_rb_disabled - computes the mask of disabled RBs
3075  *
3076  * @rdev: radeon_device pointer
3077  * @max_rb_num: max RBs (render backends) for the asic
3078  * @se_num: number of SEs (shader engines) for the asic
3079  * @sh_per_se: number of SH blocks per SE for the asic
3080  *
3081  * Calculates the bitmask of disabled RBs (CIK).
3082  * Returns the disabled RB bitmask.
3083  */
3084 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3085                               u32 max_rb_num_per_se,
3086                               u32 sh_per_se)
3087 {
3088         u32 data, mask;
3089
3090         data = RREG32(CC_RB_BACKEND_DISABLE);
3091         if (data & 1)
3092                 data &= BACKEND_DISABLE_MASK;
3093         else
3094                 data = 0;
3095         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3096
3097         data >>= BACKEND_DISABLE_SHIFT;
3098
3099         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3100
3101         return data & mask;
3102 }
3103
3104 /**
3105  * cik_setup_rb - setup the RBs on the asic
3106  *
3107  * @rdev: radeon_device pointer
3108  * @se_num: number of SEs (shader engines) for the asic
3109  * @sh_per_se: number of SH blocks per SE for the asic
3110  * @max_rb_num: max RBs (render backends) for the asic
3111  *
3112  * Configures per-SE/SH RB registers (CIK).
3113  */
3114 static void cik_setup_rb(struct radeon_device *rdev,
3115                          u32 se_num, u32 sh_per_se,
3116                          u32 max_rb_num_per_se)
3117 {
3118         int i, j;
3119         u32 data, mask;
3120         u32 disabled_rbs = 0;
3121         u32 enabled_rbs = 0;
3122
3123         mutex_lock(&rdev->grbm_idx_mutex);
3124         for (i = 0; i < se_num; i++) {
3125                 for (j = 0; j < sh_per_se; j++) {
3126                         cik_select_se_sh(rdev, i, j);
3127                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128                         if (rdev->family == CHIP_HAWAII)
3129                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130                         else
3131                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132                 }
3133         }
3134         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135         mutex_unlock(&rdev->grbm_idx_mutex);
3136
3137         mask = 1;
3138         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3139                 if (!(disabled_rbs & mask))
3140                         enabled_rbs |= mask;
3141                 mask <<= 1;
3142         }
3143
3144         rdev->config.cik.backend_enable_mask = enabled_rbs;
3145
3146         mutex_lock(&rdev->grbm_idx_mutex);
3147         for (i = 0; i < se_num; i++) {
3148                 cik_select_se_sh(rdev, i, 0xffffffff);
3149                 data = 0;
3150                 for (j = 0; j < sh_per_se; j++) {
3151                         switch (enabled_rbs & 3) {
3152                         case 0:
3153                                 if (j == 0)
3154                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3155                                 else
3156                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3157                                 break;
3158                         case 1:
3159                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3160                                 break;
3161                         case 2:
3162                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3163                                 break;
3164                         case 3:
3165                         default:
3166                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3167                                 break;
3168                         }
3169                         enabled_rbs >>= 2;
3170                 }
3171                 WREG32(PA_SC_RASTER_CONFIG, data);
3172         }
3173         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3174         mutex_unlock(&rdev->grbm_idx_mutex);
3175 }
3176
3177 /**
3178  * cik_gpu_init - setup the 3D engine
3179  *
3180  * @rdev: radeon_device pointer
3181  *
3182  * Configures the 3D engine and tiling configuration
3183  * registers so that the 3D engine is usable.
3184  */
3185 static void cik_gpu_init(struct radeon_device *rdev)
3186 {
3187         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3188         u32 mc_shared_chmap, mc_arb_ramcfg;
3189         u32 hdp_host_path_cntl;
3190         u32 tmp;
3191         int i, j;
3192
3193         switch (rdev->family) {
3194         case CHIP_BONAIRE:
3195                 rdev->config.cik.max_shader_engines = 2;
3196                 rdev->config.cik.max_tile_pipes = 4;
3197                 rdev->config.cik.max_cu_per_sh = 7;
3198                 rdev->config.cik.max_sh_per_se = 1;
3199                 rdev->config.cik.max_backends_per_se = 2;
3200                 rdev->config.cik.max_texture_channel_caches = 4;
3201                 rdev->config.cik.max_gprs = 256;
3202                 rdev->config.cik.max_gs_threads = 32;
3203                 rdev->config.cik.max_hw_contexts = 8;
3204
3205                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3210                 break;
3211         case CHIP_HAWAII:
3212                 rdev->config.cik.max_shader_engines = 4;
3213                 rdev->config.cik.max_tile_pipes = 16;
3214                 rdev->config.cik.max_cu_per_sh = 11;
3215                 rdev->config.cik.max_sh_per_se = 1;
3216                 rdev->config.cik.max_backends_per_se = 4;
3217                 rdev->config.cik.max_texture_channel_caches = 16;
3218                 rdev->config.cik.max_gprs = 256;
3219                 rdev->config.cik.max_gs_threads = 32;
3220                 rdev->config.cik.max_hw_contexts = 8;
3221
3222                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3227                 break;
3228         case CHIP_KAVERI:
3229                 rdev->config.cik.max_shader_engines = 1;
3230                 rdev->config.cik.max_tile_pipes = 4;
3231                 if ((rdev->pdev->device == 0x1304) ||
3232                     (rdev->pdev->device == 0x1305) ||
3233                     (rdev->pdev->device == 0x130C) ||
3234                     (rdev->pdev->device == 0x130F) ||
3235                     (rdev->pdev->device == 0x1310) ||
3236                     (rdev->pdev->device == 0x1311) ||
3237                     (rdev->pdev->device == 0x131C)) {
3238                         rdev->config.cik.max_cu_per_sh = 8;
3239                         rdev->config.cik.max_backends_per_se = 2;
3240                 } else if ((rdev->pdev->device == 0x1309) ||
3241                            (rdev->pdev->device == 0x130A) ||
3242                            (rdev->pdev->device == 0x130D) ||
3243                            (rdev->pdev->device == 0x1313) ||
3244                            (rdev->pdev->device == 0x131D)) {
3245                         rdev->config.cik.max_cu_per_sh = 6;
3246                         rdev->config.cik.max_backends_per_se = 2;
3247                 } else if ((rdev->pdev->device == 0x1306) ||
3248                            (rdev->pdev->device == 0x1307) ||
3249                            (rdev->pdev->device == 0x130B) ||
3250                            (rdev->pdev->device == 0x130E) ||
3251                            (rdev->pdev->device == 0x1315) ||
3252                            (rdev->pdev->device == 0x1318) ||
3253                            (rdev->pdev->device == 0x131B)) {
3254                         rdev->config.cik.max_cu_per_sh = 4;
3255                         rdev->config.cik.max_backends_per_se = 1;
3256                 } else {
3257                         rdev->config.cik.max_cu_per_sh = 3;
3258                         rdev->config.cik.max_backends_per_se = 1;
3259                 }
3260                 rdev->config.cik.max_sh_per_se = 1;
3261                 rdev->config.cik.max_texture_channel_caches = 4;
3262                 rdev->config.cik.max_gprs = 256;
3263                 rdev->config.cik.max_gs_threads = 16;
3264                 rdev->config.cik.max_hw_contexts = 8;
3265
3266                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3267                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3268                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3269                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3270                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3271                 break;
3272         case CHIP_KABINI:
3273         case CHIP_MULLINS:
3274         default:
3275                 rdev->config.cik.max_shader_engines = 1;
3276                 rdev->config.cik.max_tile_pipes = 2;
3277                 rdev->config.cik.max_cu_per_sh = 2;
3278                 rdev->config.cik.max_sh_per_se = 1;
3279                 rdev->config.cik.max_backends_per_se = 1;
3280                 rdev->config.cik.max_texture_channel_caches = 2;
3281                 rdev->config.cik.max_gprs = 256;
3282                 rdev->config.cik.max_gs_threads = 16;
3283                 rdev->config.cik.max_hw_contexts = 8;
3284
3285                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3286                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3287                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3288                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3289                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3290                 break;
3291         }
3292
3293         /* Initialize HDP */
3294         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3295                 WREG32((0x2c14 + j), 0x00000000);
3296                 WREG32((0x2c18 + j), 0x00000000);
3297                 WREG32((0x2c1c + j), 0x00000000);
3298                 WREG32((0x2c20 + j), 0x00000000);
3299                 WREG32((0x2c24 + j), 0x00000000);
3300         }
3301
3302         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3303         WREG32(SRBM_INT_CNTL, 0x1);
3304         WREG32(SRBM_INT_ACK, 0x1);
3305
3306         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3307
3308         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3309         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3310
3311         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3312         rdev->config.cik.mem_max_burst_length_bytes = 256;
3313         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3314         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3315         if (rdev->config.cik.mem_row_size_in_kb > 4)
3316                 rdev->config.cik.mem_row_size_in_kb = 4;
3317         /* XXX use MC settings? */
3318         rdev->config.cik.shader_engine_tile_size = 32;
3319         rdev->config.cik.num_gpus = 1;
3320         rdev->config.cik.multi_gpu_tile_size = 64;
3321
3322         /* fix up row size */
3323         gb_addr_config &= ~ROW_SIZE_MASK;
3324         switch (rdev->config.cik.mem_row_size_in_kb) {
3325         case 1:
3326         default:
3327                 gb_addr_config |= ROW_SIZE(0);
3328                 break;
3329         case 2:
3330                 gb_addr_config |= ROW_SIZE(1);
3331                 break;
3332         case 4:
3333                 gb_addr_config |= ROW_SIZE(2);
3334                 break;
3335         }
3336
3337         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3338          * not have bank info, so create a custom tiling dword.
3339          * bits 3:0   num_pipes
3340          * bits 7:4   num_banks
3341          * bits 11:8  group_size
3342          * bits 15:12 row_size
3343          */
3344         rdev->config.cik.tile_config = 0;
3345         switch (rdev->config.cik.num_tile_pipes) {
3346         case 1:
3347                 rdev->config.cik.tile_config |= (0 << 0);
3348                 break;
3349         case 2:
3350                 rdev->config.cik.tile_config |= (1 << 0);
3351                 break;
3352         case 4:
3353                 rdev->config.cik.tile_config |= (2 << 0);
3354                 break;
3355         case 8:
3356         default:
3357                 /* XXX what about 12? */
3358                 rdev->config.cik.tile_config |= (3 << 0);
3359                 break;
3360         }
3361         rdev->config.cik.tile_config |=
3362                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3363         rdev->config.cik.tile_config |=
3364                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3365         rdev->config.cik.tile_config |=
3366                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3367
3368         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3369         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3370         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3371         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3372         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3373         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3374         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3375         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3376
3377         cik_tiling_mode_table_init(rdev);
3378
3379         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3380                      rdev->config.cik.max_sh_per_se,
3381                      rdev->config.cik.max_backends_per_se);
3382
3383         rdev->config.cik.active_cus = 0;
3384         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3385                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3386                         rdev->config.cik.active_cus +=
3387                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3388                 }
3389         }
3390
3391         /* set HW defaults for 3D engine */
3392         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3393
3394         mutex_lock(&rdev->grbm_idx_mutex);
3395         /*
3396          * making sure that the following register writes will be broadcasted
3397          * to all the shaders
3398          */
3399         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3400         WREG32(SX_DEBUG_1, 0x20);
3401
3402         WREG32(TA_CNTL_AUX, 0x00010000);
3403
3404         tmp = RREG32(SPI_CONFIG_CNTL);
3405         tmp |= 0x03000000;
3406         WREG32(SPI_CONFIG_CNTL, tmp);
3407
3408         WREG32(SQ_CONFIG, 1);
3409
3410         WREG32(DB_DEBUG, 0);
3411
3412         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3413         tmp |= 0x00000400;
3414         WREG32(DB_DEBUG2, tmp);
3415
3416         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3417         tmp |= 0x00020200;
3418         WREG32(DB_DEBUG3, tmp);
3419
3420         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3421         tmp |= 0x00018208;
3422         WREG32(CB_HW_CONTROL, tmp);
3423
3424         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3425
3426         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3427                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3428                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3429                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3430
3431         WREG32(VGT_NUM_INSTANCES, 1);
3432
3433         WREG32(CP_PERFMON_CNTL, 0);
3434
3435         WREG32(SQ_CONFIG, 0);
3436
3437         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3438                                           FORCE_EOV_MAX_REZ_CNT(255)));
3439
3440         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3441                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3442
3443         WREG32(VGT_GS_VERTEX_REUSE, 16);
3444         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3445
3446         tmp = RREG32(HDP_MISC_CNTL);
3447         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3448         WREG32(HDP_MISC_CNTL, tmp);
3449
3450         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3451         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3452
3453         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3454         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3455         mutex_unlock(&rdev->grbm_idx_mutex);
3456
3457         udelay(50);
3458 }
3459
3460 /*
3461  * GPU scratch registers helpers function.
3462  */
3463 /**
3464  * cik_scratch_init - setup driver info for CP scratch regs
3465  *
3466  * @rdev: radeon_device pointer
3467  *
3468  * Set up the number and offset of the CP scratch registers.
3469  * NOTE: use of CP scratch registers is a legacy inferface and
3470  * is not used by default on newer asics (r6xx+).  On newer asics,
3471  * memory buffers are used for fences rather than scratch regs.
3472  */
3473 static void cik_scratch_init(struct radeon_device *rdev)
3474 {
3475         int i;
3476
3477         rdev->scratch.num_reg = 7;
3478         rdev->scratch.reg_base = SCRATCH_REG0;
3479         for (i = 0; i < rdev->scratch.num_reg; i++) {
3480                 rdev->scratch.free[i] = true;
3481                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3482         }
3483 }
3484
3485 /**
3486  * cik_ring_test - basic gfx ring test
3487  *
3488  * @rdev: radeon_device pointer
3489  * @ring: radeon_ring structure holding ring information
3490  *
3491  * Allocate a scratch register and write to it using the gfx ring (CIK).
3492  * Provides a basic gfx ring test to verify that the ring is working.
3493  * Used by cik_cp_gfx_resume();
3494  * Returns 0 on success, error on failure.
3495  */
3496 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3497 {
3498         uint32_t scratch;
3499         uint32_t tmp = 0;
3500         unsigned i;
3501         int r;
3502
3503         r = radeon_scratch_get(rdev, &scratch);
3504         if (r) {
3505                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3506                 return r;
3507         }
3508         WREG32(scratch, 0xCAFEDEAD);
3509         r = radeon_ring_lock(rdev, ring, 3);
3510         if (r) {
3511                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3512                 radeon_scratch_free(rdev, scratch);
3513                 return r;
3514         }
3515         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3516         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3517         radeon_ring_write(ring, 0xDEADBEEF);
3518         radeon_ring_unlock_commit(rdev, ring, false);
3519
3520         for (i = 0; i < rdev->usec_timeout; i++) {
3521                 tmp = RREG32(scratch);
3522                 if (tmp == 0xDEADBEEF)
3523                         break;
3524                 DRM_UDELAY(1);
3525         }
3526         if (i < rdev->usec_timeout) {
3527                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3528         } else {
3529                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3530                           ring->idx, scratch, tmp);
3531                 r = -EINVAL;
3532         }
3533         radeon_scratch_free(rdev, scratch);
3534         return r;
3535 }
3536
3537 /**
3538  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3539  *
3540  * @rdev: radeon_device pointer
3541  * @ridx: radeon ring index
3542  *
3543  * Emits an hdp flush on the cp.
3544  */
3545 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3546                                        int ridx)
3547 {
3548         struct radeon_ring *ring = &rdev->ring[ridx];
3549         u32 ref_and_mask;
3550
3551         switch (ring->idx) {
3552         case CAYMAN_RING_TYPE_CP1_INDEX:
3553         case CAYMAN_RING_TYPE_CP2_INDEX:
3554         default:
3555                 switch (ring->me) {
3556                 case 0:
3557                         ref_and_mask = CP2 << ring->pipe;
3558                         break;
3559                 case 1:
3560                         ref_and_mask = CP6 << ring->pipe;
3561                         break;
3562                 default:
3563                         return;
3564                 }
3565                 break;
3566         case RADEON_RING_TYPE_GFX_INDEX:
3567                 ref_and_mask = CP0;
3568                 break;
3569         }
3570
3571         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3572         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3573                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3574                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3575         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3576         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3577         radeon_ring_write(ring, ref_and_mask);
3578         radeon_ring_write(ring, ref_and_mask);
3579         radeon_ring_write(ring, 0x20); /* poll interval */
3580 }
3581
3582 /**
3583  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3584  *
3585  * @rdev: radeon_device pointer
3586  * @fence: radeon fence object
3587  *
3588  * Emits a fence sequnce number on the gfx ring and flushes
3589  * GPU caches.
3590  */
3591 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3592                              struct radeon_fence *fence)
3593 {
3594         struct radeon_ring *ring = &rdev->ring[fence->ring];
3595         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3596
3597         /* Workaround for cache flush problems. First send a dummy EOP
3598          * event down the pipe with seq one below.
3599          */
3600         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3601         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3602                                  EOP_TC_ACTION_EN |
3603                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3604                                  EVENT_INDEX(5)));
3605         radeon_ring_write(ring, addr & 0xfffffffc);
3606         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3607                                 DATA_SEL(1) | INT_SEL(0));
3608         radeon_ring_write(ring, fence->seq - 1);
3609         radeon_ring_write(ring, 0);
3610
3611         /* Then send the real EOP event down the pipe. */
3612         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3613         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3614                                  EOP_TC_ACTION_EN |
3615                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3616                                  EVENT_INDEX(5)));
3617         radeon_ring_write(ring, addr & 0xfffffffc);
3618         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3619         radeon_ring_write(ring, fence->seq);
3620         radeon_ring_write(ring, 0);
3621 }
3622
3623 /**
3624  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3625  *
3626  * @rdev: radeon_device pointer
3627  * @fence: radeon fence object
3628  *
3629  * Emits a fence sequnce number on the compute ring and flushes
3630  * GPU caches.
3631  */
3632 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3633                                  struct radeon_fence *fence)
3634 {
3635         struct radeon_ring *ring = &rdev->ring[fence->ring];
3636         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3637
3638         /* RELEASE_MEM - flush caches, send int */
3639         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3640         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3641                                  EOP_TC_ACTION_EN |
3642                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3643                                  EVENT_INDEX(5)));
3644         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3645         radeon_ring_write(ring, addr & 0xfffffffc);
3646         radeon_ring_write(ring, upper_32_bits(addr));
3647         radeon_ring_write(ring, fence->seq);
3648         radeon_ring_write(ring, 0);
3649 }
3650
3651 /**
3652  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3653  *
3654  * @rdev: radeon_device pointer
3655  * @ring: radeon ring buffer object
3656  * @semaphore: radeon semaphore object
3657  * @emit_wait: Is this a sempahore wait?
3658  *
3659  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3660  * from running ahead of semaphore waits.
3661  */
3662 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3663                              struct radeon_ring *ring,
3664                              struct radeon_semaphore *semaphore,
3665                              bool emit_wait)
3666 {
3667         uint64_t addr = semaphore->gpu_addr;
3668         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3669
3670         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3671         radeon_ring_write(ring, lower_32_bits(addr));
3672         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3673
3674         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3675                 /* Prevent the PFP from running ahead of the semaphore wait */
3676                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3677                 radeon_ring_write(ring, 0x0);
3678         }
3679
3680         return true;
3681 }
3682
3683 /**
3684  * cik_copy_cpdma - copy pages using the CP DMA engine
3685  *
3686  * @rdev: radeon_device pointer
3687  * @src_offset: src GPU address
3688  * @dst_offset: dst GPU address
3689  * @num_gpu_pages: number of GPU pages to xfer
3690  * @resv: reservation object to sync to
3691  *
3692  * Copy GPU paging using the CP DMA engine (CIK+).
3693  * Used by the radeon ttm implementation to move pages if
3694  * registered as the asic copy callback.
3695  */
3696 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3697                                     uint64_t src_offset, uint64_t dst_offset,
3698                                     unsigned num_gpu_pages,
3699                                     struct reservation_object *resv)
3700 {
3701         struct radeon_fence *fence;
3702         struct radeon_sync sync;
3703         int ring_index = rdev->asic->copy.blit_ring_index;
3704         struct radeon_ring *ring = &rdev->ring[ring_index];
3705         u32 size_in_bytes, cur_size_in_bytes, control;
3706         int i, num_loops;
3707         int r = 0;
3708
3709         radeon_sync_create(&sync);
3710
3711         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3712         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3713         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3714         if (r) {
3715                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3716                 radeon_sync_free(rdev, &sync, NULL);
3717                 return ERR_PTR(r);
3718         }
3719
3720         radeon_sync_resv(rdev, &sync, resv, false);
3721         radeon_sync_rings(rdev, &sync, ring->idx);
3722
3723         for (i = 0; i < num_loops; i++) {
3724                 cur_size_in_bytes = size_in_bytes;
3725                 if (cur_size_in_bytes > 0x1fffff)
3726                         cur_size_in_bytes = 0x1fffff;
3727                 size_in_bytes -= cur_size_in_bytes;
3728                 control = 0;
3729                 if (size_in_bytes == 0)
3730                         control |= PACKET3_DMA_DATA_CP_SYNC;
3731                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3732                 radeon_ring_write(ring, control);
3733                 radeon_ring_write(ring, lower_32_bits(src_offset));
3734                 radeon_ring_write(ring, upper_32_bits(src_offset));
3735                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3736                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3737                 radeon_ring_write(ring, cur_size_in_bytes);
3738                 src_offset += cur_size_in_bytes;
3739                 dst_offset += cur_size_in_bytes;
3740         }
3741
3742         r = radeon_fence_emit(rdev, &fence, ring->idx);
3743         if (r) {
3744                 radeon_ring_unlock_undo(rdev, ring);
3745                 radeon_sync_free(rdev, &sync, NULL);
3746                 return ERR_PTR(r);
3747         }
3748
3749         radeon_ring_unlock_commit(rdev, ring, false);
3750         radeon_sync_free(rdev, &sync, fence);
3751
3752         return fence;
3753 }
3754
3755 /*
3756  * IB stuff
3757  */
3758 /**
3759  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3760  *
3761  * @rdev: radeon_device pointer
3762  * @ib: radeon indirect buffer object
3763  *
3764  * Emits a DE (drawing engine) or CE (constant engine) IB
3765  * on the gfx ring.  IBs are usually generated by userspace
3766  * acceleration drivers and submitted to the kernel for
3767  * scheduling on the ring.  This function schedules the IB
3768  * on the gfx ring for execution by the GPU.
3769  */
3770 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3771 {
3772         struct radeon_ring *ring = &rdev->ring[ib->ring];
3773         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3774         u32 header, control = INDIRECT_BUFFER_VALID;
3775
3776         if (ib->is_const_ib) {
3777                 /* set switch buffer packet before const IB */
3778                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3779                 radeon_ring_write(ring, 0);
3780
3781                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3782         } else {
3783                 u32 next_rptr;
3784                 if (ring->rptr_save_reg) {
3785                         next_rptr = ring->wptr + 3 + 4;
3786                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3787                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3788                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3789                         radeon_ring_write(ring, next_rptr);
3790                 } else if (rdev->wb.enabled) {
3791                         next_rptr = ring->wptr + 5 + 4;
3792                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3793                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3794                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3795                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3796                         radeon_ring_write(ring, next_rptr);
3797                 }
3798
3799                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3800         }
3801
3802         control |= ib->length_dw | (vm_id << 24);
3803
3804         radeon_ring_write(ring, header);
3805         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3806         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3807         radeon_ring_write(ring, control);
3808 }
3809
3810 /**
3811  * cik_ib_test - basic gfx ring IB test
3812  *
3813  * @rdev: radeon_device pointer
3814  * @ring: radeon_ring structure holding ring information
3815  *
3816  * Allocate an IB and execute it on the gfx ring (CIK).
3817  * Provides a basic gfx ring test to verify that IBs are working.
3818  * Returns 0 on success, error on failure.
3819  */
3820 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3821 {
3822         struct radeon_ib ib;
3823         uint32_t scratch;
3824         uint32_t tmp = 0;
3825         unsigned i;
3826         int r;
3827
3828         r = radeon_scratch_get(rdev, &scratch);
3829         if (r) {
3830                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3831                 return r;
3832         }
3833         WREG32(scratch, 0xCAFEDEAD);
3834         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3835         if (r) {
3836                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3837                 radeon_scratch_free(rdev, scratch);
3838                 return r;
3839         }
3840         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3841         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3842         ib.ptr[2] = 0xDEADBEEF;
3843         ib.length_dw = 3;
3844         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3845         if (r) {
3846                 radeon_scratch_free(rdev, scratch);
3847                 radeon_ib_free(rdev, &ib);
3848                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3849                 return r;
3850         }
3851         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3852                 RADEON_USEC_IB_TEST_TIMEOUT));
3853         if (r < 0) {
3854                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3855                 radeon_scratch_free(rdev, scratch);
3856                 radeon_ib_free(rdev, &ib);
3857                 return r;
3858         } else if (r == 0) {
3859                 DRM_ERROR("radeon: fence wait timed out.\n");
3860                 radeon_scratch_free(rdev, scratch);
3861                 radeon_ib_free(rdev, &ib);
3862                 return -ETIMEDOUT;
3863         }
3864         r = 0;
3865         for (i = 0; i < rdev->usec_timeout; i++) {
3866                 tmp = RREG32(scratch);
3867                 if (tmp == 0xDEADBEEF)
3868                         break;
3869                 DRM_UDELAY(1);
3870         }
3871         if (i < rdev->usec_timeout) {
3872                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3873         } else {
3874                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3875                           scratch, tmp);
3876                 r = -EINVAL;
3877         }
3878         radeon_scratch_free(rdev, scratch);
3879         radeon_ib_free(rdev, &ib);
3880         return r;
3881 }
3882
3883 /*
3884  * CP.
3885  * On CIK, gfx and compute now have independant command processors.
3886  *
3887  * GFX
3888  * Gfx consists of a single ring and can process both gfx jobs and
3889  * compute jobs.  The gfx CP consists of three microengines (ME):
3890  * PFP - Pre-Fetch Parser
3891  * ME - Micro Engine
3892  * CE - Constant Engine
3893  * The PFP and ME make up what is considered the Drawing Engine (DE).
3894  * The CE is an asynchronous engine used for updating buffer desciptors
3895  * used by the DE so that they can be loaded into cache in parallel
3896  * while the DE is processing state update packets.
3897  *
3898  * Compute
3899  * The compute CP consists of two microengines (ME):
3900  * MEC1 - Compute MicroEngine 1
3901  * MEC2 - Compute MicroEngine 2
3902  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3903  * The queues are exposed to userspace and are programmed directly
3904  * by the compute runtime.
3905  */
3906 /**
3907  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3908  *
3909  * @rdev: radeon_device pointer
3910  * @enable: enable or disable the MEs
3911  *
3912  * Halts or unhalts the gfx MEs.
3913  */
3914 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3915 {
3916         if (enable)
3917                 WREG32(CP_ME_CNTL, 0);
3918         else {
3919                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3920                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3921                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3922                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3923         }
3924         udelay(50);
3925 }
3926
3927 /**
3928  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3929  *
3930  * @rdev: radeon_device pointer
3931  *
3932  * Loads the gfx PFP, ME, and CE ucode.
3933  * Returns 0 for success, -EINVAL if the ucode is not available.
3934  */
3935 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3936 {
3937         int i;
3938
3939         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3940                 return -EINVAL;
3941
3942         cik_cp_gfx_enable(rdev, false);
3943
3944         if (rdev->new_fw) {
3945                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3946                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3947                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3948                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3949                 const struct gfx_firmware_header_v1_0 *me_hdr =
3950                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3951                 const __le32 *fw_data;
3952                 u32 fw_size;
3953
3954                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3955                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3956                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3957
3958                 /* PFP */
3959                 fw_data = (const __le32 *)
3960                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3961                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3962                 WREG32(CP_PFP_UCODE_ADDR, 0);
3963                 for (i = 0; i < fw_size; i++)
3964                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3965                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3966
3967                 /* CE */
3968                 fw_data = (const __le32 *)
3969                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3970                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3971                 WREG32(CP_CE_UCODE_ADDR, 0);
3972                 for (i = 0; i < fw_size; i++)
3973                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3974                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3975
3976                 /* ME */
3977                 fw_data = (const __be32 *)
3978                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3979                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3980                 WREG32(CP_ME_RAM_WADDR, 0);
3981                 for (i = 0; i < fw_size; i++)
3982                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3983                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3984                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3985         } else {
3986                 const __be32 *fw_data;
3987
3988                 /* PFP */
3989                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3990                 WREG32(CP_PFP_UCODE_ADDR, 0);
3991                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3992                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3993                 WREG32(CP_PFP_UCODE_ADDR, 0);
3994
3995                 /* CE */
3996                 fw_data = (const __be32 *)rdev->ce_fw->data;
3997                 WREG32(CP_CE_UCODE_ADDR, 0);
3998                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3999                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4000                 WREG32(CP_CE_UCODE_ADDR, 0);
4001
4002                 /* ME */
4003                 fw_data = (const __be32 *)rdev->me_fw->data;
4004                 WREG32(CP_ME_RAM_WADDR, 0);
4005                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4006                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4007                 WREG32(CP_ME_RAM_WADDR, 0);
4008         }
4009
4010         return 0;
4011 }
4012
4013 /**
4014  * cik_cp_gfx_start - start the gfx ring
4015  *
4016  * @rdev: radeon_device pointer
4017  *
4018  * Enables the ring and loads the clear state context and other
4019  * packets required to init the ring.
4020  * Returns 0 for success, error for failure.
4021  */
4022 static int cik_cp_gfx_start(struct radeon_device *rdev)
4023 {
4024         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4025         int r, i;
4026
4027         /* init the CP */
4028         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4029         WREG32(CP_ENDIAN_SWAP, 0);
4030         WREG32(CP_DEVICE_ID, 1);
4031
4032         cik_cp_gfx_enable(rdev, true);
4033
4034         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4035         if (r) {
4036                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4037                 return r;
4038         }
4039
4040         /* init the CE partitions.  CE only used for gfx on CIK */
4041         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4042         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4043         radeon_ring_write(ring, 0x8000);
4044         radeon_ring_write(ring, 0x8000);
4045
4046         /* setup clear context state */
4047         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4048         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4049
4050         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4051         radeon_ring_write(ring, 0x80000000);
4052         radeon_ring_write(ring, 0x80000000);
4053
4054         for (i = 0; i < cik_default_size; i++)
4055                 radeon_ring_write(ring, cik_default_state[i]);
4056
4057         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4058         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4059
4060         /* set clear context state */
4061         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4062         radeon_ring_write(ring, 0);
4063
4064         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4065         radeon_ring_write(ring, 0x00000316);
4066         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4067         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4068
4069         radeon_ring_unlock_commit(rdev, ring, false);
4070
4071         return 0;
4072 }
4073
4074 /**
4075  * cik_cp_gfx_fini - stop the gfx ring
4076  *
4077  * @rdev: radeon_device pointer
4078  *
4079  * Stop the gfx ring and tear down the driver ring
4080  * info.
4081  */
4082 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4083 {
4084         cik_cp_gfx_enable(rdev, false);
4085         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4086 }
4087
4088 /**
4089  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4090  *
4091  * @rdev: radeon_device pointer
4092  *
4093  * Program the location and size of the gfx ring buffer
4094  * and test it to make sure it's working.
4095  * Returns 0 for success, error for failure.
4096  */
4097 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4098 {
4099         struct radeon_ring *ring;
4100         u32 tmp;
4101         u32 rb_bufsz;
4102         u64 rb_addr;
4103         int r;
4104
4105         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4106         if (rdev->family != CHIP_HAWAII)
4107                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4108
4109         /* Set the write pointer delay */
4110         WREG32(CP_RB_WPTR_DELAY, 0);
4111
4112         /* set the RB to use vmid 0 */
4113         WREG32(CP_RB_VMID, 0);
4114
4115         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4116
4117         /* ring 0 - compute and gfx */
4118         /* Set ring buffer size */
4119         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4120         rb_bufsz = order_base_2(ring->ring_size / 8);
4121         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4122 #ifdef __BIG_ENDIAN
4123         tmp |= BUF_SWAP_32BIT;
4124 #endif
4125         WREG32(CP_RB0_CNTL, tmp);
4126
4127         /* Initialize the ring buffer's read and write pointers */
4128         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4129         ring->wptr = 0;
4130         WREG32(CP_RB0_WPTR, ring->wptr);
4131
4132         /* set the wb address wether it's enabled or not */
4133         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4134         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4135
4136         /* scratch register shadowing is no longer supported */
4137         WREG32(SCRATCH_UMSK, 0);
4138
4139         if (!rdev->wb.enabled)
4140                 tmp |= RB_NO_UPDATE;
4141
4142         mdelay(1);
4143         WREG32(CP_RB0_CNTL, tmp);
4144
4145         rb_addr = ring->gpu_addr >> 8;
4146         WREG32(CP_RB0_BASE, rb_addr);
4147         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4148
4149         /* start the ring */
4150         cik_cp_gfx_start(rdev);
4151         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4152         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4153         if (r) {
4154                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4155                 return r;
4156         }
4157
4158         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4159                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4160
4161         return 0;
4162 }
4163
4164 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4165                      struct radeon_ring *ring)
4166 {
4167         u32 rptr;
4168
4169         if (rdev->wb.enabled)
4170                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4171         else
4172                 rptr = RREG32(CP_RB0_RPTR);
4173
4174         return rptr;
4175 }
4176
4177 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4178                      struct radeon_ring *ring)
4179 {
4180         return RREG32(CP_RB0_WPTR);
4181 }
4182
4183 void cik_gfx_set_wptr(struct radeon_device *rdev,
4184                       struct radeon_ring *ring)
4185 {
4186         WREG32(CP_RB0_WPTR, ring->wptr);
4187         (void)RREG32(CP_RB0_WPTR);
4188 }
4189
4190 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4191                          struct radeon_ring *ring)
4192 {
4193         u32 rptr;
4194
4195         if (rdev->wb.enabled) {
4196                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4197         } else {
4198                 mutex_lock(&rdev->srbm_mutex);
4199                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4200                 rptr = RREG32(CP_HQD_PQ_RPTR);
4201                 cik_srbm_select(rdev, 0, 0, 0, 0);
4202                 mutex_unlock(&rdev->srbm_mutex);
4203         }
4204
4205         return rptr;
4206 }
4207
4208 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4209                          struct radeon_ring *ring)
4210 {
4211         u32 wptr;
4212
4213         if (rdev->wb.enabled) {
4214                 /* XXX check if swapping is necessary on BE */
4215                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4216         } else {
4217                 mutex_lock(&rdev->srbm_mutex);
4218                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4219                 wptr = RREG32(CP_HQD_PQ_WPTR);
4220                 cik_srbm_select(rdev, 0, 0, 0, 0);
4221                 mutex_unlock(&rdev->srbm_mutex);
4222         }
4223
4224         return wptr;
4225 }
4226
4227 void cik_compute_set_wptr(struct radeon_device *rdev,
4228                           struct radeon_ring *ring)
4229 {
4230         /* XXX check if swapping is necessary on BE */
4231         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4232         WDOORBELL32(ring->doorbell_index, ring->wptr);
4233 }
4234
4235 static void cik_compute_stop(struct radeon_device *rdev,
4236                              struct radeon_ring *ring)
4237 {
4238         u32 j, tmp;
4239
4240         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4241         /* Disable wptr polling. */
4242         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4243         tmp &= ~WPTR_POLL_EN;
4244         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4245         /* Disable HQD. */
4246         if (RREG32(CP_HQD_ACTIVE) & 1) {
4247                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4248                 for (j = 0; j < rdev->usec_timeout; j++) {
4249                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4250                                 break;
4251                         udelay(1);
4252                 }
4253                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4254                 WREG32(CP_HQD_PQ_RPTR, 0);
4255                 WREG32(CP_HQD_PQ_WPTR, 0);
4256         }
4257         cik_srbm_select(rdev, 0, 0, 0, 0);
4258 }
4259
4260 /**
4261  * cik_cp_compute_enable - enable/disable the compute CP MEs
4262  *
4263  * @rdev: radeon_device pointer
4264  * @enable: enable or disable the MEs
4265  *
4266  * Halts or unhalts the compute MEs.
4267  */
4268 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4269 {
4270         if (enable)
4271                 WREG32(CP_MEC_CNTL, 0);
4272         else {
4273                 /*
4274                  * To make hibernation reliable we need to clear compute ring
4275                  * configuration before halting the compute ring.
4276                  */
4277                 mutex_lock(&rdev->srbm_mutex);
4278                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4279                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4280                 mutex_unlock(&rdev->srbm_mutex);
4281
4282                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4283                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4284                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4285         }
4286         udelay(50);
4287 }
4288
4289 /**
4290  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4291  *
4292  * @rdev: radeon_device pointer
4293  *
4294  * Loads the compute MEC1&2 ucode.
4295  * Returns 0 for success, -EINVAL if the ucode is not available.
4296  */
4297 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4298 {
4299         int i;
4300
4301         if (!rdev->mec_fw)
4302                 return -EINVAL;
4303
4304         cik_cp_compute_enable(rdev, false);
4305
4306         if (rdev->new_fw) {
4307                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4308                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4309                 const __le32 *fw_data;
4310                 u32 fw_size;
4311
4312                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4313
4314                 /* MEC1 */
4315                 fw_data = (const __le32 *)
4316                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4317                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4318                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4319                 for (i = 0; i < fw_size; i++)
4320                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4321                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4322
4323                 /* MEC2 */
4324                 if (rdev->family == CHIP_KAVERI) {
4325                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4326                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4327
4328                         fw_data = (const __le32 *)
4329                                 (rdev->mec2_fw->data +
4330                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4331                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4332                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4333                         for (i = 0; i < fw_size; i++)
4334                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4335                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4336                 }
4337         } else {
4338                 const __be32 *fw_data;
4339
4340                 /* MEC1 */
4341                 fw_data = (const __be32 *)rdev->mec_fw->data;
4342                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4343                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4344                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4345                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4346
4347                 if (rdev->family == CHIP_KAVERI) {
4348                         /* MEC2 */
4349                         fw_data = (const __be32 *)rdev->mec_fw->data;
4350                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4351                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4352                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4353                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4354                 }
4355         }
4356
4357         return 0;
4358 }
4359
4360 /**
4361  * cik_cp_compute_start - start the compute queues
4362  *
4363  * @rdev: radeon_device pointer
4364  *
4365  * Enable the compute queues.
4366  * Returns 0 for success, error for failure.
4367  */
4368 static int cik_cp_compute_start(struct radeon_device *rdev)
4369 {
4370         cik_cp_compute_enable(rdev, true);
4371
4372         return 0;
4373 }
4374
4375 /**
4376  * cik_cp_compute_fini - stop the compute queues
4377  *
4378  * @rdev: radeon_device pointer
4379  *
4380  * Stop the compute queues and tear down the driver queue
4381  * info.
4382  */
4383 static void cik_cp_compute_fini(struct radeon_device *rdev)
4384 {
4385         int i, idx, r;
4386
4387         cik_cp_compute_enable(rdev, false);
4388
4389         for (i = 0; i < 2; i++) {
4390                 if (i == 0)
4391                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4392                 else
4393                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4394
4395                 if (rdev->ring[idx].mqd_obj) {
4396                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4397                         if (unlikely(r != 0))
4398                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4399
4400                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4401                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4402
4403                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4404                         rdev->ring[idx].mqd_obj = NULL;
4405                 }
4406         }
4407 }
4408
4409 static void cik_mec_fini(struct radeon_device *rdev)
4410 {
4411         int r;
4412
4413         if (rdev->mec.hpd_eop_obj) {
4414                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4415                 if (unlikely(r != 0))
4416                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4417                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4418                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4419
4420                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4421                 rdev->mec.hpd_eop_obj = NULL;
4422         }
4423 }
4424
4425 #define MEC_HPD_SIZE 2048
4426
4427 static int cik_mec_init(struct radeon_device *rdev)
4428 {
4429         int r;
4430         u32 *hpd;
4431
4432         /*
4433          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4434          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4435          * Nonetheless, we assign only 1 pipe because all other pipes will
4436          * be handled by KFD
4437          */
4438         rdev->mec.num_mec = 1;
4439         rdev->mec.num_pipe = 1;
4440         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4441
4442         if (rdev->mec.hpd_eop_obj == NULL) {
4443                 r = radeon_bo_create(rdev,
4444                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4445                                      PAGE_SIZE, true,
4446                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4447                                      &rdev->mec.hpd_eop_obj);
4448                 if (r) {
4449                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4450                         return r;
4451                 }
4452         }
4453
4454         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4455         if (unlikely(r != 0)) {
4456                 cik_mec_fini(rdev);
4457                 return r;
4458         }
4459         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4460                           &rdev->mec.hpd_eop_gpu_addr);
4461         if (r) {
4462                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4463                 cik_mec_fini(rdev);
4464                 return r;
4465         }
4466         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4467         if (r) {
4468                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4469                 cik_mec_fini(rdev);
4470                 return r;
4471         }
4472
4473         /* clear memory.  Not sure if this is required or not */
4474         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4475
4476         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4477         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4478
4479         return 0;
4480 }
4481
4482 struct hqd_registers
4483 {
4484         u32 cp_mqd_base_addr;
4485         u32 cp_mqd_base_addr_hi;
4486         u32 cp_hqd_active;
4487         u32 cp_hqd_vmid;
4488         u32 cp_hqd_persistent_state;
4489         u32 cp_hqd_pipe_priority;
4490         u32 cp_hqd_queue_priority;
4491         u32 cp_hqd_quantum;
4492         u32 cp_hqd_pq_base;
4493         u32 cp_hqd_pq_base_hi;
4494         u32 cp_hqd_pq_rptr;
4495         u32 cp_hqd_pq_rptr_report_addr;
4496         u32 cp_hqd_pq_rptr_report_addr_hi;
4497         u32 cp_hqd_pq_wptr_poll_addr;
4498         u32 cp_hqd_pq_wptr_poll_addr_hi;
4499         u32 cp_hqd_pq_doorbell_control;
4500         u32 cp_hqd_pq_wptr;
4501         u32 cp_hqd_pq_control;
4502         u32 cp_hqd_ib_base_addr;
4503         u32 cp_hqd_ib_base_addr_hi;
4504         u32 cp_hqd_ib_rptr;
4505         u32 cp_hqd_ib_control;
4506         u32 cp_hqd_iq_timer;
4507         u32 cp_hqd_iq_rptr;
4508         u32 cp_hqd_dequeue_request;
4509         u32 cp_hqd_dma_offload;
4510         u32 cp_hqd_sema_cmd;
4511         u32 cp_hqd_msg_type;
4512         u32 cp_hqd_atomic0_preop_lo;
4513         u32 cp_hqd_atomic0_preop_hi;
4514         u32 cp_hqd_atomic1_preop_lo;
4515         u32 cp_hqd_atomic1_preop_hi;
4516         u32 cp_hqd_hq_scheduler0;
4517         u32 cp_hqd_hq_scheduler1;
4518         u32 cp_mqd_control;
4519 };
4520
4521 struct bonaire_mqd
4522 {
4523         u32 header;
4524         u32 dispatch_initiator;
4525         u32 dimensions[3];
4526         u32 start_idx[3];
4527         u32 num_threads[3];
4528         u32 pipeline_stat_enable;
4529         u32 perf_counter_enable;
4530         u32 pgm[2];
4531         u32 tba[2];
4532         u32 tma[2];
4533         u32 pgm_rsrc[2];
4534         u32 vmid;
4535         u32 resource_limits;
4536         u32 static_thread_mgmt01[2];
4537         u32 tmp_ring_size;
4538         u32 static_thread_mgmt23[2];
4539         u32 restart[3];
4540         u32 thread_trace_enable;
4541         u32 reserved1;
4542         u32 user_data[16];
4543         u32 vgtcs_invoke_count[2];
4544         struct hqd_registers queue_state;
4545         u32 dequeue_cntr;
4546         u32 interrupt_queue[64];
4547 };
4548
4549 /**
4550  * cik_cp_compute_resume - setup the compute queue registers
4551  *
4552  * @rdev: radeon_device pointer
4553  *
4554  * Program the compute queues and test them to make sure they
4555  * are working.
4556  * Returns 0 for success, error for failure.
4557  */
4558 static int cik_cp_compute_resume(struct radeon_device *rdev)
4559 {
4560         int r, i, j, idx;
4561         u32 tmp;
4562         bool use_doorbell = true;
4563         u64 hqd_gpu_addr;
4564         u64 mqd_gpu_addr;
4565         u64 eop_gpu_addr;
4566         u64 wb_gpu_addr;
4567         u32 *buf;
4568         struct bonaire_mqd *mqd;
4569
4570         r = cik_cp_compute_start(rdev);
4571         if (r)
4572                 return r;
4573
4574         /* fix up chicken bits */
4575         tmp = RREG32(CP_CPF_DEBUG);
4576         tmp |= (1 << 23);
4577         WREG32(CP_CPF_DEBUG, tmp);
4578
4579         /* init the pipes */
4580         mutex_lock(&rdev->srbm_mutex);
4581
4582         for (i = 0; i < rdev->mec.num_pipe; ++i) {
4583                 cik_srbm_select(rdev, 0, i, 0, 0);
4584
4585                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4586                 /* write the EOP addr */
4587                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4588                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4589
4590                 /* set the VMID assigned */
4591                 WREG32(CP_HPD_EOP_VMID, 0);
4592
4593                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4594                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4595                 tmp &= ~EOP_SIZE_MASK;
4596                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4597                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4598
4599         }
4600         mutex_unlock(&rdev->srbm_mutex);
4601
4602         /* init the queues.  Just two for now. */
4603         for (i = 0; i < 2; i++) {
4604                 if (i == 0)
4605                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4606                 else
4607                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4608
4609                 if (rdev->ring[idx].mqd_obj == NULL) {
4610                         r = radeon_bo_create(rdev,
4611                                              sizeof(struct bonaire_mqd),
4612                                              PAGE_SIZE, true,
4613                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4614                                              NULL, &rdev->ring[idx].mqd_obj);
4615                         if (r) {
4616                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4617                                 return r;
4618                         }
4619                 }
4620
4621                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4622                 if (unlikely(r != 0)) {
4623                         cik_cp_compute_fini(rdev);
4624                         return r;
4625                 }
4626                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4627                                   &mqd_gpu_addr);
4628                 if (r) {
4629                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4630                         cik_cp_compute_fini(rdev);
4631                         return r;
4632                 }
4633                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4634                 if (r) {
4635                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4636                         cik_cp_compute_fini(rdev);
4637                         return r;
4638                 }
4639
4640                 /* init the mqd struct */
4641                 memset(buf, 0, sizeof(struct bonaire_mqd));
4642
4643                 mqd = (struct bonaire_mqd *)buf;
4644                 mqd->header = 0xC0310800;
4645                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4646                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4647                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4648                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4649
4650                 mutex_lock(&rdev->srbm_mutex);
4651                 cik_srbm_select(rdev, rdev->ring[idx].me,
4652                                 rdev->ring[idx].pipe,
4653                                 rdev->ring[idx].queue, 0);
4654
4655                 /* disable wptr polling */
4656                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4657                 tmp &= ~WPTR_POLL_EN;
4658                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4659
4660                 /* enable doorbell? */
4661                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4662                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4663                 if (use_doorbell)
4664                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4665                 else
4666                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4667                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4668                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4669
4670                 /* disable the queue if it's active */
4671                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4672                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4673                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4674                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4675                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4676                         for (j = 0; j < rdev->usec_timeout; j++) {
4677                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4678                                         break;
4679                                 udelay(1);
4680                         }
4681                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4682                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4683                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4684                 }
4685
4686                 /* set the pointer to the MQD */
4687                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4688                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4689                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4690                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4691                 /* set MQD vmid to 0 */
4692                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4693                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4694                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4695
4696                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4697                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4698                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4699                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4700                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4701                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4702
4703                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4704                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4705                 mqd->queue_state.cp_hqd_pq_control &=
4706                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4707
4708                 mqd->queue_state.cp_hqd_pq_control |=
4709                         order_base_2(rdev->ring[idx].ring_size / 8);
4710                 mqd->queue_state.cp_hqd_pq_control |=
4711                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4712 #ifdef __BIG_ENDIAN
4713                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4714 #endif
4715                 mqd->queue_state.cp_hqd_pq_control &=
4716                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4717                 mqd->queue_state.cp_hqd_pq_control |=
4718                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4719                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4720
4721                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4722                 if (i == 0)
4723                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4724                 else
4725                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4726                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4727                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4728                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4729                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4730                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4731
4732                 /* set the wb address wether it's enabled or not */
4733                 if (i == 0)
4734                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4735                 else
4736                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4737                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4738                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4739                         upper_32_bits(wb_gpu_addr) & 0xffff;
4740                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4741                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4742                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4743                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4744
4745                 /* enable the doorbell if requested */
4746                 if (use_doorbell) {
4747                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4748                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4749                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4750                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4751                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4752                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4753                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4754                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4755
4756                 } else {
4757                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4758                 }
4759                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4760                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4761
4762                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4763                 rdev->ring[idx].wptr = 0;
4764                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4765                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4766                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4767
4768                 /* set the vmid for the queue */
4769                 mqd->queue_state.cp_hqd_vmid = 0;
4770                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4771
4772                 /* activate the queue */
4773                 mqd->queue_state.cp_hqd_active = 1;
4774                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4775
4776                 cik_srbm_select(rdev, 0, 0, 0, 0);
4777                 mutex_unlock(&rdev->srbm_mutex);
4778
4779                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4780                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4781
4782                 rdev->ring[idx].ready = true;
4783                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4784                 if (r)
4785                         rdev->ring[idx].ready = false;
4786         }
4787
4788         return 0;
4789 }
4790
4791 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4792 {
4793         cik_cp_gfx_enable(rdev, enable);
4794         cik_cp_compute_enable(rdev, enable);
4795 }
4796
4797 static int cik_cp_load_microcode(struct radeon_device *rdev)
4798 {
4799         int r;
4800
4801         r = cik_cp_gfx_load_microcode(rdev);
4802         if (r)
4803                 return r;
4804         r = cik_cp_compute_load_microcode(rdev);
4805         if (r)
4806                 return r;
4807
4808         return 0;
4809 }
4810
4811 static void cik_cp_fini(struct radeon_device *rdev)
4812 {
4813         cik_cp_gfx_fini(rdev);
4814         cik_cp_compute_fini(rdev);
4815 }
4816
4817 static int cik_cp_resume(struct radeon_device *rdev)
4818 {
4819         int r;
4820
4821         cik_enable_gui_idle_interrupt(rdev, false);
4822
4823         r = cik_cp_load_microcode(rdev);
4824         if (r)
4825                 return r;
4826
4827         r = cik_cp_gfx_resume(rdev);
4828         if (r)
4829                 return r;
4830         r = cik_cp_compute_resume(rdev);
4831         if (r)
4832                 return r;
4833
4834         cik_enable_gui_idle_interrupt(rdev, true);
4835
4836         return 0;
4837 }
4838
4839 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4840 {
4841         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4842                 RREG32(GRBM_STATUS));
4843         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4844                 RREG32(GRBM_STATUS2));
4845         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4846                 RREG32(GRBM_STATUS_SE0));
4847         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4848                 RREG32(GRBM_STATUS_SE1));
4849         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4850                 RREG32(GRBM_STATUS_SE2));
4851         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4852                 RREG32(GRBM_STATUS_SE3));
4853         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4854                 RREG32(SRBM_STATUS));
4855         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4856                 RREG32(SRBM_STATUS2));
4857         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4858                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4859         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4860                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4861         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4862         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4863                  RREG32(CP_STALLED_STAT1));
4864         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4865                  RREG32(CP_STALLED_STAT2));
4866         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4867                  RREG32(CP_STALLED_STAT3));
4868         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4869                  RREG32(CP_CPF_BUSY_STAT));
4870         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4871                  RREG32(CP_CPF_STALLED_STAT1));
4872         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4873         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4874         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4875                  RREG32(CP_CPC_STALLED_STAT1));
4876         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4877 }
4878
4879 /**
4880  * cik_gpu_check_soft_reset - check which blocks are busy
4881  *
4882  * @rdev: radeon_device pointer
4883  *
4884  * Check which blocks are busy and return the relevant reset
4885  * mask to be used by cik_gpu_soft_reset().
4886  * Returns a mask of the blocks to be reset.
4887  */
4888 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4889 {
4890         u32 reset_mask = 0;
4891         u32 tmp;
4892
4893         /* GRBM_STATUS */
4894         tmp = RREG32(GRBM_STATUS);
4895         if (tmp & (PA_BUSY | SC_BUSY |
4896                    BCI_BUSY | SX_BUSY |
4897                    TA_BUSY | VGT_BUSY |
4898                    DB_BUSY | CB_BUSY |
4899                    GDS_BUSY | SPI_BUSY |
4900                    IA_BUSY | IA_BUSY_NO_DMA))
4901                 reset_mask |= RADEON_RESET_GFX;
4902
4903         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4904                 reset_mask |= RADEON_RESET_CP;
4905
4906         /* GRBM_STATUS2 */
4907         tmp = RREG32(GRBM_STATUS2);
4908         if (tmp & RLC_BUSY)
4909                 reset_mask |= RADEON_RESET_RLC;
4910
4911         /* SDMA0_STATUS_REG */
4912         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4913         if (!(tmp & SDMA_IDLE))
4914                 reset_mask |= RADEON_RESET_DMA;
4915
4916         /* SDMA1_STATUS_REG */
4917         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4918         if (!(tmp & SDMA_IDLE))
4919                 reset_mask |= RADEON_RESET_DMA1;
4920
4921         /* SRBM_STATUS2 */
4922         tmp = RREG32(SRBM_STATUS2);
4923         if (tmp & SDMA_BUSY)
4924                 reset_mask |= RADEON_RESET_DMA;
4925
4926         if (tmp & SDMA1_BUSY)
4927                 reset_mask |= RADEON_RESET_DMA1;
4928
4929         /* SRBM_STATUS */
4930         tmp = RREG32(SRBM_STATUS);
4931
4932         if (tmp & IH_BUSY)
4933                 reset_mask |= RADEON_RESET_IH;
4934
4935         if (tmp & SEM_BUSY)
4936                 reset_mask |= RADEON_RESET_SEM;
4937
4938         if (tmp & GRBM_RQ_PENDING)
4939                 reset_mask |= RADEON_RESET_GRBM;
4940
4941         if (tmp & VMC_BUSY)
4942                 reset_mask |= RADEON_RESET_VMC;
4943
4944         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4945                    MCC_BUSY | MCD_BUSY))
4946                 reset_mask |= RADEON_RESET_MC;
4947
4948         if (evergreen_is_display_hung(rdev))
4949                 reset_mask |= RADEON_RESET_DISPLAY;
4950
4951         /* Skip MC reset as it's mostly likely not hung, just busy */
4952         if (reset_mask & RADEON_RESET_MC) {
4953                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4954                 reset_mask &= ~RADEON_RESET_MC;
4955         }
4956
4957         return reset_mask;
4958 }
4959
4960 /**
4961  * cik_gpu_soft_reset - soft reset GPU
4962  *
4963  * @rdev: radeon_device pointer
4964  * @reset_mask: mask of which blocks to reset
4965  *
4966  * Soft reset the blocks specified in @reset_mask.
4967  */
4968 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4969 {
4970         struct evergreen_mc_save save;
4971         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4972         u32 tmp;
4973
4974         if (reset_mask == 0)
4975                 return;
4976
4977         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4978
4979         cik_print_gpu_status_regs(rdev);
4980         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4981                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4982         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4983                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4984
4985         /* disable CG/PG */
4986         cik_fini_pg(rdev);
4987         cik_fini_cg(rdev);
4988
4989         /* stop the rlc */
4990         cik_rlc_stop(rdev);
4991
4992         /* Disable GFX parsing/prefetching */
4993         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4994
4995         /* Disable MEC parsing/prefetching */
4996         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4997
4998         if (reset_mask & RADEON_RESET_DMA) {
4999                 /* sdma0 */
5000                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5001                 tmp |= SDMA_HALT;
5002                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5003         }
5004         if (reset_mask & RADEON_RESET_DMA1) {
5005                 /* sdma1 */
5006                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5007                 tmp |= SDMA_HALT;
5008                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5009         }
5010
5011         evergreen_mc_stop(rdev, &save);
5012         if (evergreen_mc_wait_for_idle(rdev)) {
5013                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5014         }
5015
5016         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5017                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5018
5019         if (reset_mask & RADEON_RESET_CP) {
5020                 grbm_soft_reset |= SOFT_RESET_CP;
5021
5022                 srbm_soft_reset |= SOFT_RESET_GRBM;
5023         }
5024
5025         if (reset_mask & RADEON_RESET_DMA)
5026                 srbm_soft_reset |= SOFT_RESET_SDMA;
5027
5028         if (reset_mask & RADEON_RESET_DMA1)
5029                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5030
5031         if (reset_mask & RADEON_RESET_DISPLAY)
5032                 srbm_soft_reset |= SOFT_RESET_DC;
5033
5034         if (reset_mask & RADEON_RESET_RLC)
5035                 grbm_soft_reset |= SOFT_RESET_RLC;
5036
5037         if (reset_mask & RADEON_RESET_SEM)
5038                 srbm_soft_reset |= SOFT_RESET_SEM;
5039
5040         if (reset_mask & RADEON_RESET_IH)
5041                 srbm_soft_reset |= SOFT_RESET_IH;
5042
5043         if (reset_mask & RADEON_RESET_GRBM)
5044                 srbm_soft_reset |= SOFT_RESET_GRBM;
5045
5046         if (reset_mask & RADEON_RESET_VMC)
5047                 srbm_soft_reset |= SOFT_RESET_VMC;
5048
5049         if (!(rdev->flags & RADEON_IS_IGP)) {
5050                 if (reset_mask & RADEON_RESET_MC)
5051                         srbm_soft_reset |= SOFT_RESET_MC;
5052         }
5053
5054         if (grbm_soft_reset) {
5055                 tmp = RREG32(GRBM_SOFT_RESET);
5056                 tmp |= grbm_soft_reset;
5057                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5058                 WREG32(GRBM_SOFT_RESET, tmp);
5059                 tmp = RREG32(GRBM_SOFT_RESET);
5060
5061                 udelay(50);
5062
5063                 tmp &= ~grbm_soft_reset;
5064                 WREG32(GRBM_SOFT_RESET, tmp);
5065                 tmp = RREG32(GRBM_SOFT_RESET);
5066         }
5067
5068         if (srbm_soft_reset) {
5069                 tmp = RREG32(SRBM_SOFT_RESET);
5070                 tmp |= srbm_soft_reset;
5071                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5072                 WREG32(SRBM_SOFT_RESET, tmp);
5073                 tmp = RREG32(SRBM_SOFT_RESET);
5074
5075                 udelay(50);
5076
5077                 tmp &= ~srbm_soft_reset;
5078                 WREG32(SRBM_SOFT_RESET, tmp);
5079                 tmp = RREG32(SRBM_SOFT_RESET);
5080         }
5081
5082         /* Wait a little for things to settle down */
5083         udelay(50);
5084
5085         evergreen_mc_resume(rdev, &save);
5086         udelay(50);
5087
5088         cik_print_gpu_status_regs(rdev);
5089 }
5090
5091 struct kv_reset_save_regs {
5092         u32 gmcon_reng_execute;
5093         u32 gmcon_misc;
5094         u32 gmcon_misc3;
5095 };
5096
5097 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5098                                    struct kv_reset_save_regs *save)
5099 {
5100         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5101         save->gmcon_misc = RREG32(GMCON_MISC);
5102         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5103
5104         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5105         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5106                                                 STCTRL_STUTTER_EN));
5107 }
5108
5109 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5110                                       struct kv_reset_save_regs *save)
5111 {
5112         int i;
5113
5114         WREG32(GMCON_PGFSM_WRITE, 0);
5115         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5116
5117         for (i = 0; i < 5; i++)
5118                 WREG32(GMCON_PGFSM_WRITE, 0);
5119
5120         WREG32(GMCON_PGFSM_WRITE, 0);
5121         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5122
5123         for (i = 0; i < 5; i++)
5124                 WREG32(GMCON_PGFSM_WRITE, 0);
5125
5126         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5127         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5128
5129         for (i = 0; i < 5; i++)
5130                 WREG32(GMCON_PGFSM_WRITE, 0);
5131
5132         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5133         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5134
5135         for (i = 0; i < 5; i++)
5136                 WREG32(GMCON_PGFSM_WRITE, 0);
5137
5138         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5139         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5140
5141         for (i = 0; i < 5; i++)
5142                 WREG32(GMCON_PGFSM_WRITE, 0);
5143
5144         WREG32(GMCON_PGFSM_WRITE, 0);
5145         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5146
5147         for (i = 0; i < 5; i++)
5148                 WREG32(GMCON_PGFSM_WRITE, 0);
5149
5150         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5151         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5152
5153         for (i = 0; i < 5; i++)
5154                 WREG32(GMCON_PGFSM_WRITE, 0);
5155
5156         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5157         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5158
5159         for (i = 0; i < 5; i++)
5160                 WREG32(GMCON_PGFSM_WRITE, 0);
5161
5162         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5163         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5164
5165         for (i = 0; i < 5; i++)
5166                 WREG32(GMCON_PGFSM_WRITE, 0);
5167
5168         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5169         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5170
5171         for (i = 0; i < 5; i++)
5172                 WREG32(GMCON_PGFSM_WRITE, 0);
5173
5174         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5175         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5176
5177         WREG32(GMCON_MISC3, save->gmcon_misc3);
5178         WREG32(GMCON_MISC, save->gmcon_misc);
5179         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5180 }
5181
5182 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5183 {
5184         struct evergreen_mc_save save;
5185         struct kv_reset_save_regs kv_save = { 0 };
5186         u32 tmp, i;
5187
5188         dev_info(rdev->dev, "GPU pci config reset\n");
5189
5190         /* disable dpm? */
5191
5192         /* disable cg/pg */
5193         cik_fini_pg(rdev);
5194         cik_fini_cg(rdev);
5195
5196         /* Disable GFX parsing/prefetching */
5197         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5198
5199         /* Disable MEC parsing/prefetching */
5200         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5201
5202         /* sdma0 */
5203         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5204         tmp |= SDMA_HALT;
5205         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5206         /* sdma1 */
5207         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5208         tmp |= SDMA_HALT;
5209         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5210         /* XXX other engines? */
5211
5212         /* halt the rlc, disable cp internal ints */
5213         cik_rlc_stop(rdev);
5214
5215         udelay(50);
5216
5217         /* disable mem access */
5218         evergreen_mc_stop(rdev, &save);
5219         if (evergreen_mc_wait_for_idle(rdev)) {
5220                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5221         }
5222
5223         if (rdev->flags & RADEON_IS_IGP)
5224                 kv_save_regs_for_reset(rdev, &kv_save);
5225
5226         /* disable BM */
5227         pci_clear_master(rdev->pdev);
5228         /* reset */
5229         radeon_pci_config_reset(rdev);
5230
5231         udelay(100);
5232
5233         /* wait for asic to come out of reset */
5234         for (i = 0; i < rdev->usec_timeout; i++) {
5235                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5236                         break;
5237                 udelay(1);
5238         }
5239
5240         /* does asic init need to be run first??? */
5241         if (rdev->flags & RADEON_IS_IGP)
5242                 kv_restore_regs_for_reset(rdev, &kv_save);
5243 }
5244
5245 /**
5246  * cik_asic_reset - soft reset GPU
5247  *
5248  * @rdev: radeon_device pointer
5249  * @hard: force hard reset
5250  *
5251  * Look up which blocks are hung and attempt
5252  * to reset them.
5253  * Returns 0 for success.
5254  */
5255 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5256 {
5257         u32 reset_mask;
5258
5259         if (hard) {
5260                 cik_gpu_pci_config_reset(rdev);
5261                 return 0;
5262         }
5263
5264         reset_mask = cik_gpu_check_soft_reset(rdev);
5265
5266         if (reset_mask)
5267                 r600_set_bios_scratch_engine_hung(rdev, true);
5268
5269         /* try soft reset */
5270         cik_gpu_soft_reset(rdev, reset_mask);
5271
5272         reset_mask = cik_gpu_check_soft_reset(rdev);
5273
5274         /* try pci config reset */
5275         if (reset_mask && radeon_hard_reset)
5276                 cik_gpu_pci_config_reset(rdev);
5277
5278         reset_mask = cik_gpu_check_soft_reset(rdev);
5279
5280         if (!reset_mask)
5281                 r600_set_bios_scratch_engine_hung(rdev, false);
5282
5283         return 0;
5284 }
5285
5286 /**
5287  * cik_gfx_is_lockup - check if the 3D engine is locked up
5288  *
5289  * @rdev: radeon_device pointer
5290  * @ring: radeon_ring structure holding ring information
5291  *
5292  * Check if the 3D engine is locked up (CIK).
5293  * Returns true if the engine is locked, false if not.
5294  */
5295 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5296 {
5297         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5298
5299         if (!(reset_mask & (RADEON_RESET_GFX |
5300                             RADEON_RESET_COMPUTE |
5301                             RADEON_RESET_CP))) {
5302                 radeon_ring_lockup_update(rdev, ring);
5303                 return false;
5304         }
5305         return radeon_ring_test_lockup(rdev, ring);
5306 }
5307
5308 /* MC */
5309 /**
5310  * cik_mc_program - program the GPU memory controller
5311  *
5312  * @rdev: radeon_device pointer
5313  *
5314  * Set the location of vram, gart, and AGP in the GPU's
5315  * physical address space (CIK).
5316  */
5317 static void cik_mc_program(struct radeon_device *rdev)
5318 {
5319         struct evergreen_mc_save save;
5320         u32 tmp;
5321         int i, j;
5322
5323         /* Initialize HDP */
5324         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5325                 WREG32((0x2c14 + j), 0x00000000);
5326                 WREG32((0x2c18 + j), 0x00000000);
5327                 WREG32((0x2c1c + j), 0x00000000);
5328                 WREG32((0x2c20 + j), 0x00000000);
5329                 WREG32((0x2c24 + j), 0x00000000);
5330         }
5331         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5332
5333         evergreen_mc_stop(rdev, &save);
5334         if (radeon_mc_wait_for_idle(rdev)) {
5335                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5336         }
5337         /* Lockout access through VGA aperture*/
5338         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5339         /* Update configuration */
5340         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5341                rdev->mc.vram_start >> 12);
5342         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5343                rdev->mc.vram_end >> 12);
5344         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5345                rdev->vram_scratch.gpu_addr >> 12);
5346         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5347         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5348         WREG32(MC_VM_FB_LOCATION, tmp);
5349         /* XXX double check these! */
5350         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5351         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5352         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5353         WREG32(MC_VM_AGP_BASE, 0);
5354         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5355         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5356         if (radeon_mc_wait_for_idle(rdev)) {
5357                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5358         }
5359         evergreen_mc_resume(rdev, &save);
5360         /* we need to own VRAM, so turn off the VGA renderer here
5361          * to stop it overwriting our objects */
5362         rv515_vga_render_disable(rdev);
5363 }
5364
5365 /**
5366  * cik_mc_init - initialize the memory controller driver params
5367  *
5368  * @rdev: radeon_device pointer
5369  *
5370  * Look up the amount of vram, vram width, and decide how to place
5371  * vram and gart within the GPU's physical address space (CIK).
5372  * Returns 0 for success.
5373  */
5374 static int cik_mc_init(struct radeon_device *rdev)
5375 {
5376         u32 tmp;
5377         int chansize, numchan;
5378
5379         /* Get VRAM informations */
5380         rdev->mc.vram_is_ddr = true;
5381         tmp = RREG32(MC_ARB_RAMCFG);
5382         if (tmp & CHANSIZE_MASK) {
5383                 chansize = 64;
5384         } else {
5385                 chansize = 32;
5386         }
5387         tmp = RREG32(MC_SHARED_CHMAP);
5388         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5389         case 0:
5390         default:
5391                 numchan = 1;
5392                 break;
5393         case 1:
5394                 numchan = 2;
5395                 break;
5396         case 2:
5397                 numchan = 4;
5398                 break;
5399         case 3:
5400                 numchan = 8;
5401                 break;
5402         case 4:
5403                 numchan = 3;
5404                 break;
5405         case 5:
5406                 numchan = 6;
5407                 break;
5408         case 6:
5409                 numchan = 10;
5410                 break;
5411         case 7:
5412                 numchan = 12;
5413                 break;
5414         case 8:
5415                 numchan = 16;
5416                 break;
5417         }
5418         rdev->mc.vram_width = numchan * chansize;
5419         /* Could aper size report 0 ? */
5420         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5421         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5422         /* size in MB on si */
5423         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5424         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5425         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5426         si_vram_gtt_location(rdev, &rdev->mc);
5427         radeon_update_bandwidth_info(rdev);
5428
5429         return 0;
5430 }
5431
5432 /*
5433  * GART
5434  * VMID 0 is the physical GPU addresses as used by the kernel.
5435  * VMIDs 1-15 are used for userspace clients and are handled
5436  * by the radeon vm/hsa code.
5437  */
5438 /**
5439  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5440  *
5441  * @rdev: radeon_device pointer
5442  *
5443  * Flush the TLB for the VMID 0 page table (CIK).
5444  */
5445 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5446 {
5447         /* flush hdp cache */
5448         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5449
5450         /* bits 0-15 are the VM contexts0-15 */
5451         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5452 }
5453
5454 /**
5455  * cik_pcie_gart_enable - gart enable
5456  *
5457  * @rdev: radeon_device pointer
5458  *
5459  * This sets up the TLBs, programs the page tables for VMID0,
5460  * sets up the hw for VMIDs 1-15 which are allocated on
5461  * demand, and sets up the global locations for the LDS, GDS,
5462  * and GPUVM for FSA64 clients (CIK).
5463  * Returns 0 for success, errors for failure.
5464  */
5465 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5466 {
5467         int r, i;
5468
5469         if (rdev->gart.robj == NULL) {
5470                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5471                 return -EINVAL;
5472         }
5473         r = radeon_gart_table_vram_pin(rdev);
5474         if (r)
5475                 return r;
5476         /* Setup TLB control */
5477         WREG32(MC_VM_MX_L1_TLB_CNTL,
5478                (0xA << 7) |
5479                ENABLE_L1_TLB |
5480                ENABLE_L1_FRAGMENT_PROCESSING |
5481                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5482                ENABLE_ADVANCED_DRIVER_MODEL |
5483                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5484         /* Setup L2 cache */
5485         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5486                ENABLE_L2_FRAGMENT_PROCESSING |
5487                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5488                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5489                EFFECTIVE_L2_QUEUE_SIZE(7) |
5490                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5491         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5492         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5493                BANK_SELECT(4) |
5494                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5495         /* setup context0 */
5496         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5497         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5498         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5499         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5500                         (u32)(rdev->dummy_page.addr >> 12));
5501         WREG32(VM_CONTEXT0_CNTL2, 0);
5502         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5503                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5504
5505         WREG32(0x15D4, 0);
5506         WREG32(0x15D8, 0);
5507         WREG32(0x15DC, 0);
5508
5509         /* restore context1-15 */
5510         /* set vm size, must be a multiple of 4 */
5511         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5512         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5513         for (i = 1; i < 16; i++) {
5514                 if (i < 8)
5515                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5516                                rdev->vm_manager.saved_table_addr[i]);
5517                 else
5518                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5519                                rdev->vm_manager.saved_table_addr[i]);
5520         }
5521
5522         /* enable context1-15 */
5523         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5524                (u32)(rdev->dummy_page.addr >> 12));
5525         WREG32(VM_CONTEXT1_CNTL2, 4);
5526         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5527                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5528                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5529                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5530                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5531                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5532                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5533                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5534                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5535                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5536                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5537                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5538                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5539                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5540
5541         if (rdev->family == CHIP_KAVERI) {
5542                 u32 tmp = RREG32(CHUB_CONTROL);
5543                 tmp &= ~BYPASS_VM;
5544                 WREG32(CHUB_CONTROL, tmp);
5545         }
5546
5547         /* XXX SH_MEM regs */
5548         /* where to put LDS, scratch, GPUVM in FSA64 space */
5549         mutex_lock(&rdev->srbm_mutex);
5550         for (i = 0; i < 16; i++) {
5551                 cik_srbm_select(rdev, 0, 0, 0, i);
5552                 /* CP and shaders */
5553                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5554                 WREG32(SH_MEM_APE1_BASE, 1);
5555                 WREG32(SH_MEM_APE1_LIMIT, 0);
5556                 WREG32(SH_MEM_BASES, 0);
5557                 /* SDMA GFX */
5558                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5559                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5560                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5561                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5562                 /* XXX SDMA RLC - todo */
5563         }
5564         cik_srbm_select(rdev, 0, 0, 0, 0);
5565         mutex_unlock(&rdev->srbm_mutex);
5566
5567         cik_pcie_gart_tlb_flush(rdev);
5568         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5569                  (unsigned)(rdev->mc.gtt_size >> 20),
5570                  (unsigned long long)rdev->gart.table_addr);
5571         rdev->gart.ready = true;
5572         return 0;
5573 }
5574
5575 /**
5576  * cik_pcie_gart_disable - gart disable
5577  *
5578  * @rdev: radeon_device pointer
5579  *
5580  * This disables all VM page table (CIK).
5581  */
5582 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5583 {
5584         unsigned i;
5585
5586         for (i = 1; i < 16; ++i) {
5587                 uint32_t reg;
5588                 if (i < 8)
5589                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5590                 else
5591                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5592                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5593         }
5594
5595         /* Disable all tables */
5596         WREG32(VM_CONTEXT0_CNTL, 0);
5597         WREG32(VM_CONTEXT1_CNTL, 0);
5598         /* Setup TLB control */
5599         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5600                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5601         /* Setup L2 cache */
5602         WREG32(VM_L2_CNTL,
5603                ENABLE_L2_FRAGMENT_PROCESSING |
5604                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5605                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5606                EFFECTIVE_L2_QUEUE_SIZE(7) |
5607                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5608         WREG32(VM_L2_CNTL2, 0);
5609         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5610                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5611         radeon_gart_table_vram_unpin(rdev);
5612 }
5613
5614 /**
5615  * cik_pcie_gart_fini - vm fini callback
5616  *
5617  * @rdev: radeon_device pointer
5618  *
5619  * Tears down the driver GART/VM setup (CIK).
5620  */
5621 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5622 {
5623         cik_pcie_gart_disable(rdev);
5624         radeon_gart_table_vram_free(rdev);
5625         radeon_gart_fini(rdev);
5626 }
5627
5628 /* vm parser */
5629 /**
5630  * cik_ib_parse - vm ib_parse callback
5631  *
5632  * @rdev: radeon_device pointer
5633  * @ib: indirect buffer pointer
5634  *
5635  * CIK uses hw IB checking so this is a nop (CIK).
5636  */
5637 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5638 {
5639         return 0;
5640 }
5641
5642 /*
5643  * vm
5644  * VMID 0 is the physical GPU addresses as used by the kernel.
5645  * VMIDs 1-15 are used for userspace clients and are handled
5646  * by the radeon vm/hsa code.
5647  */
5648 /**
5649  * cik_vm_init - cik vm init callback
5650  *
5651  * @rdev: radeon_device pointer
5652  *
5653  * Inits cik specific vm parameters (number of VMs, base of vram for
5654  * VMIDs 1-15) (CIK).
5655  * Returns 0 for success.
5656  */
5657 int cik_vm_init(struct radeon_device *rdev)
5658 {
5659         /*
5660          * number of VMs
5661          * VMID 0 is reserved for System
5662          * radeon graphics/compute will use VMIDs 1-15
5663          */
5664         rdev->vm_manager.nvm = 16;
5665         /* base offset of vram pages */
5666         if (rdev->flags & RADEON_IS_IGP) {
5667                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5668                 tmp <<= 22;
5669                 rdev->vm_manager.vram_base_offset = tmp;
5670         } else
5671                 rdev->vm_manager.vram_base_offset = 0;
5672
5673         return 0;
5674 }
5675
5676 /**
5677  * cik_vm_fini - cik vm fini callback
5678  *
5679  * @rdev: radeon_device pointer
5680  *
5681  * Tear down any asic specific VM setup (CIK).
5682  */
5683 void cik_vm_fini(struct radeon_device *rdev)
5684 {
5685 }
5686
5687 /**
5688  * cik_vm_decode_fault - print human readable fault info
5689  *
5690  * @rdev: radeon_device pointer
5691  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5692  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5693  *
5694  * Print human readable fault information (CIK).
5695  */
5696 static void cik_vm_decode_fault(struct radeon_device *rdev,
5697                                 u32 status, u32 addr, u32 mc_client)
5698 {
5699         u32 mc_id;
5700         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5701         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5702         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5703                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5704
5705         if (rdev->family == CHIP_HAWAII)
5706                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5707         else
5708                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5709
5710         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5711                protections, vmid, addr,
5712                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5713                block, mc_client, mc_id);
5714 }
5715
5716 /**
5717  * cik_vm_flush - cik vm flush using the CP
5718  *
5719  * @rdev: radeon_device pointer
5720  *
5721  * Update the page table base and flush the VM TLB
5722  * using the CP (CIK).
5723  */
5724 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5725                   unsigned vm_id, uint64_t pd_addr)
5726 {
5727         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5728
5729         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5730         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5731                                  WRITE_DATA_DST_SEL(0)));
5732         if (vm_id < 8) {
5733                 radeon_ring_write(ring,
5734                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5735         } else {
5736                 radeon_ring_write(ring,
5737                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5738         }
5739         radeon_ring_write(ring, 0);
5740         radeon_ring_write(ring, pd_addr >> 12);
5741
5742         /* update SH_MEM_* regs */
5743         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5744         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5745                                  WRITE_DATA_DST_SEL(0)));
5746         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5747         radeon_ring_write(ring, 0);
5748         radeon_ring_write(ring, VMID(vm_id));
5749
5750         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5751         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5752                                  WRITE_DATA_DST_SEL(0)));
5753         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5754         radeon_ring_write(ring, 0);
5755
5756         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5757         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5758         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5759         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5760
5761         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5762         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5763                                  WRITE_DATA_DST_SEL(0)));
5764         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5765         radeon_ring_write(ring, 0);
5766         radeon_ring_write(ring, VMID(0));
5767
5768         /* HDP flush */
5769         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5770
5771         /* bits 0-15 are the VM contexts0-15 */
5772         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5773         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5774                                  WRITE_DATA_DST_SEL(0)));
5775         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5776         radeon_ring_write(ring, 0);
5777         radeon_ring_write(ring, 1 << vm_id);
5778
5779         /* wait for the invalidate to complete */
5780         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5781         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5782                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5783                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5784         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5785         radeon_ring_write(ring, 0);
5786         radeon_ring_write(ring, 0); /* ref */
5787         radeon_ring_write(ring, 0); /* mask */
5788         radeon_ring_write(ring, 0x20); /* poll interval */
5789
5790         /* compute doesn't have PFP */
5791         if (usepfp) {
5792                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5793                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5794                 radeon_ring_write(ring, 0x0);
5795         }
5796 }
5797
5798 /*
5799  * RLC
5800  * The RLC is a multi-purpose microengine that handles a
5801  * variety of functions, the most important of which is
5802  * the interrupt controller.
5803  */
5804 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5805                                           bool enable)
5806 {
5807         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5808
5809         if (enable)
5810                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5811         else
5812                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5813         WREG32(CP_INT_CNTL_RING0, tmp);
5814 }
5815
5816 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5817 {
5818         u32 tmp;
5819
5820         tmp = RREG32(RLC_LB_CNTL);
5821         if (enable)
5822                 tmp |= LOAD_BALANCE_ENABLE;
5823         else
5824                 tmp &= ~LOAD_BALANCE_ENABLE;
5825         WREG32(RLC_LB_CNTL, tmp);
5826 }
5827
5828 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5829 {
5830         u32 i, j, k;
5831         u32 mask;
5832
5833         mutex_lock(&rdev->grbm_idx_mutex);
5834         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5835                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5836                         cik_select_se_sh(rdev, i, j);
5837                         for (k = 0; k < rdev->usec_timeout; k++) {
5838                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5839                                         break;
5840                                 udelay(1);
5841                         }
5842                 }
5843         }
5844         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5845         mutex_unlock(&rdev->grbm_idx_mutex);
5846
5847         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5848         for (k = 0; k < rdev->usec_timeout; k++) {
5849                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5850                         break;
5851                 udelay(1);
5852         }
5853 }
5854
5855 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5856 {
5857         u32 tmp;
5858
5859         tmp = RREG32(RLC_CNTL);
5860         if (tmp != rlc)
5861                 WREG32(RLC_CNTL, rlc);
5862 }
5863
5864 static u32 cik_halt_rlc(struct radeon_device *rdev)
5865 {
5866         u32 data, orig;
5867
5868         orig = data = RREG32(RLC_CNTL);
5869
5870         if (data & RLC_ENABLE) {
5871                 u32 i;
5872
5873                 data &= ~RLC_ENABLE;
5874                 WREG32(RLC_CNTL, data);
5875
5876                 for (i = 0; i < rdev->usec_timeout; i++) {
5877                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5878                                 break;
5879                         udelay(1);
5880                 }
5881
5882                 cik_wait_for_rlc_serdes(rdev);
5883         }
5884
5885         return orig;
5886 }
5887
5888 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5889 {
5890         u32 tmp, i, mask;
5891
5892         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5893         WREG32(RLC_GPR_REG2, tmp);
5894
5895         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5896         for (i = 0; i < rdev->usec_timeout; i++) {
5897                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5898                         break;
5899                 udelay(1);
5900         }
5901
5902         for (i = 0; i < rdev->usec_timeout; i++) {
5903                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5904                         break;
5905                 udelay(1);
5906         }
5907 }
5908
5909 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5910 {
5911         u32 tmp;
5912
5913         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5914         WREG32(RLC_GPR_REG2, tmp);
5915 }
5916
5917 /**
5918  * cik_rlc_stop - stop the RLC ME
5919  *
5920  * @rdev: radeon_device pointer
5921  *
5922  * Halt the RLC ME (MicroEngine) (CIK).
5923  */
5924 static void cik_rlc_stop(struct radeon_device *rdev)
5925 {
5926         WREG32(RLC_CNTL, 0);
5927
5928         cik_enable_gui_idle_interrupt(rdev, false);
5929
5930         cik_wait_for_rlc_serdes(rdev);
5931 }
5932
5933 /**
5934  * cik_rlc_start - start the RLC ME
5935  *
5936  * @rdev: radeon_device pointer
5937  *
5938  * Unhalt the RLC ME (MicroEngine) (CIK).
5939  */
5940 static void cik_rlc_start(struct radeon_device *rdev)
5941 {
5942         WREG32(RLC_CNTL, RLC_ENABLE);
5943
5944         cik_enable_gui_idle_interrupt(rdev, true);
5945
5946         udelay(50);
5947 }
5948
5949 /**
5950  * cik_rlc_resume - setup the RLC hw
5951  *
5952  * @rdev: radeon_device pointer
5953  *
5954  * Initialize the RLC registers, load the ucode,
5955  * and start the RLC (CIK).
5956  * Returns 0 for success, -EINVAL if the ucode is not available.
5957  */
5958 static int cik_rlc_resume(struct radeon_device *rdev)
5959 {
5960         u32 i, size, tmp;
5961
5962         if (!rdev->rlc_fw)
5963                 return -EINVAL;
5964
5965         cik_rlc_stop(rdev);
5966
5967         /* disable CG */
5968         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5969         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5970
5971         si_rlc_reset(rdev);
5972
5973         cik_init_pg(rdev);
5974
5975         cik_init_cg(rdev);
5976
5977         WREG32(RLC_LB_CNTR_INIT, 0);
5978         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5979
5980         mutex_lock(&rdev->grbm_idx_mutex);
5981         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5982         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5983         WREG32(RLC_LB_PARAMS, 0x00600408);
5984         WREG32(RLC_LB_CNTL, 0x80000004);
5985         mutex_unlock(&rdev->grbm_idx_mutex);
5986
5987         WREG32(RLC_MC_CNTL, 0);
5988         WREG32(RLC_UCODE_CNTL, 0);
5989
5990         if (rdev->new_fw) {
5991                 const struct rlc_firmware_header_v1_0 *hdr =
5992                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5993                 const __le32 *fw_data = (const __le32 *)
5994                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5995
5996                 radeon_ucode_print_rlc_hdr(&hdr->header);
5997
5998                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5999                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6000                 for (i = 0; i < size; i++)
6001                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6002                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6003         } else {
6004                 const __be32 *fw_data;
6005
6006                 switch (rdev->family) {
6007                 case CHIP_BONAIRE:
6008                 case CHIP_HAWAII:
6009                 default:
6010                         size = BONAIRE_RLC_UCODE_SIZE;
6011                         break;
6012                 case CHIP_KAVERI:
6013                         size = KV_RLC_UCODE_SIZE;
6014                         break;
6015                 case CHIP_KABINI:
6016                         size = KB_RLC_UCODE_SIZE;
6017                         break;
6018                 case CHIP_MULLINS:
6019                         size = ML_RLC_UCODE_SIZE;
6020                         break;
6021                 }
6022
6023                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6024                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6025                 for (i = 0; i < size; i++)
6026                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6027                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6028         }
6029
6030         /* XXX - find out what chips support lbpw */
6031         cik_enable_lbpw(rdev, false);
6032
6033         if (rdev->family == CHIP_BONAIRE)
6034                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6035
6036         cik_rlc_start(rdev);
6037
6038         return 0;
6039 }
6040
6041 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6042 {
6043         u32 data, orig, tmp, tmp2;
6044
6045         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6046
6047         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6048                 cik_enable_gui_idle_interrupt(rdev, true);
6049
6050                 tmp = cik_halt_rlc(rdev);
6051
6052                 mutex_lock(&rdev->grbm_idx_mutex);
6053                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6054                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6055                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6056                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6057                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6058                 mutex_unlock(&rdev->grbm_idx_mutex);
6059
6060                 cik_update_rlc(rdev, tmp);
6061
6062                 data |= CGCG_EN | CGLS_EN;
6063         } else {
6064                 cik_enable_gui_idle_interrupt(rdev, false);
6065
6066                 RREG32(CB_CGTT_SCLK_CTRL);
6067                 RREG32(CB_CGTT_SCLK_CTRL);
6068                 RREG32(CB_CGTT_SCLK_CTRL);
6069                 RREG32(CB_CGTT_SCLK_CTRL);
6070
6071                 data &= ~(CGCG_EN | CGLS_EN);
6072         }
6073
6074         if (orig != data)
6075                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6076
6077 }
6078
6079 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6080 {
6081         u32 data, orig, tmp = 0;
6082
6083         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6084                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6085                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6086                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6087                                 data |= CP_MEM_LS_EN;
6088                                 if (orig != data)
6089                                         WREG32(CP_MEM_SLP_CNTL, data);
6090                         }
6091                 }
6092
6093                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6094                 data |= 0x00000001;
6095                 data &= 0xfffffffd;
6096                 if (orig != data)
6097                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6098
6099                 tmp = cik_halt_rlc(rdev);
6100
6101                 mutex_lock(&rdev->grbm_idx_mutex);
6102                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6103                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6104                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6105                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6106                 WREG32(RLC_SERDES_WR_CTRL, data);
6107                 mutex_unlock(&rdev->grbm_idx_mutex);
6108
6109                 cik_update_rlc(rdev, tmp);
6110
6111                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6112                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6113                         data &= ~SM_MODE_MASK;
6114                         data |= SM_MODE(0x2);
6115                         data |= SM_MODE_ENABLE;
6116                         data &= ~CGTS_OVERRIDE;
6117                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6118                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6119                                 data &= ~CGTS_LS_OVERRIDE;
6120                         data &= ~ON_MONITOR_ADD_MASK;
6121                         data |= ON_MONITOR_ADD_EN;
6122                         data |= ON_MONITOR_ADD(0x96);
6123                         if (orig != data)
6124                                 WREG32(CGTS_SM_CTRL_REG, data);
6125                 }
6126         } else {
6127                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6128                 data |= 0x00000003;
6129                 if (orig != data)
6130                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6131
6132                 data = RREG32(RLC_MEM_SLP_CNTL);
6133                 if (data & RLC_MEM_LS_EN) {
6134                         data &= ~RLC_MEM_LS_EN;
6135                         WREG32(RLC_MEM_SLP_CNTL, data);
6136                 }
6137
6138                 data = RREG32(CP_MEM_SLP_CNTL);
6139                 if (data & CP_MEM_LS_EN) {
6140                         data &= ~CP_MEM_LS_EN;
6141                         WREG32(CP_MEM_SLP_CNTL, data);
6142                 }
6143
6144                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6145                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6146                 if (orig != data)
6147                         WREG32(CGTS_SM_CTRL_REG, data);
6148
6149                 tmp = cik_halt_rlc(rdev);
6150
6151                 mutex_lock(&rdev->grbm_idx_mutex);
6152                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6153                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6154                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6155                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6156                 WREG32(RLC_SERDES_WR_CTRL, data);
6157                 mutex_unlock(&rdev->grbm_idx_mutex);
6158
6159                 cik_update_rlc(rdev, tmp);
6160         }
6161 }
6162
6163 static const u32 mc_cg_registers[] =
6164 {
6165         MC_HUB_MISC_HUB_CG,
6166         MC_HUB_MISC_SIP_CG,
6167         MC_HUB_MISC_VM_CG,
6168         MC_XPB_CLK_GAT,
6169         ATC_MISC_CG,
6170         MC_CITF_MISC_WR_CG,
6171         MC_CITF_MISC_RD_CG,
6172         MC_CITF_MISC_VM_CG,
6173         VM_L2_CG,
6174 };
6175
6176 static void cik_enable_mc_ls(struct radeon_device *rdev,
6177                              bool enable)
6178 {
6179         int i;
6180         u32 orig, data;
6181
6182         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6183                 orig = data = RREG32(mc_cg_registers[i]);
6184                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6185                         data |= MC_LS_ENABLE;
6186                 else
6187                         data &= ~MC_LS_ENABLE;
6188                 if (data != orig)
6189                         WREG32(mc_cg_registers[i], data);
6190         }
6191 }
6192
6193 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6194                                bool enable)
6195 {
6196         int i;
6197         u32 orig, data;
6198
6199         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6200                 orig = data = RREG32(mc_cg_registers[i]);
6201                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6202                         data |= MC_CG_ENABLE;
6203                 else
6204                         data &= ~MC_CG_ENABLE;
6205                 if (data != orig)
6206                         WREG32(mc_cg_registers[i], data);
6207         }
6208 }
6209
6210 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6211                                  bool enable)
6212 {
6213         u32 orig, data;
6214
6215         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6216                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6217                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6218         } else {
6219                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6220                 data |= 0xff000000;
6221                 if (data != orig)
6222                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6223
6224                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6225                 data |= 0xff000000;
6226                 if (data != orig)
6227                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6228         }
6229 }
6230
6231 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6232                                  bool enable)
6233 {
6234         u32 orig, data;
6235
6236         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6237                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6238                 data |= 0x100;
6239                 if (orig != data)
6240                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6241
6242                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6243                 data |= 0x100;
6244                 if (orig != data)
6245                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6246         } else {
6247                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6248                 data &= ~0x100;
6249                 if (orig != data)
6250                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6251
6252                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6253                 data &= ~0x100;
6254                 if (orig != data)
6255                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6256         }
6257 }
6258
6259 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6260                                 bool enable)
6261 {
6262         u32 orig, data;
6263
6264         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6265                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6266                 data = 0xfff;
6267                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6268
6269                 orig = data = RREG32(UVD_CGC_CTRL);
6270                 data |= DCM;
6271                 if (orig != data)
6272                         WREG32(UVD_CGC_CTRL, data);
6273         } else {
6274                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6275                 data &= ~0xfff;
6276                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6277
6278                 orig = data = RREG32(UVD_CGC_CTRL);
6279                 data &= ~DCM;
6280                 if (orig != data)
6281                         WREG32(UVD_CGC_CTRL, data);
6282         }
6283 }
6284
6285 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6286                                bool enable)
6287 {
6288         u32 orig, data;
6289
6290         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6291
6292         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6293                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6294                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6295         else
6296                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6297                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6298
6299         if (orig != data)
6300                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6301 }
6302
6303 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6304                                 bool enable)
6305 {
6306         u32 orig, data;
6307
6308         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6309
6310         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6311                 data &= ~CLOCK_GATING_DIS;
6312         else
6313                 data |= CLOCK_GATING_DIS;
6314
6315         if (orig != data)
6316                 WREG32(HDP_HOST_PATH_CNTL, data);
6317 }
6318
6319 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6320                               bool enable)
6321 {
6322         u32 orig, data;
6323
6324         orig = data = RREG32(HDP_MEM_POWER_LS);
6325
6326         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6327                 data |= HDP_LS_ENABLE;
6328         else
6329                 data &= ~HDP_LS_ENABLE;
6330
6331         if (orig != data)
6332                 WREG32(HDP_MEM_POWER_LS, data);
6333 }
6334
6335 void cik_update_cg(struct radeon_device *rdev,
6336                    u32 block, bool enable)
6337 {
6338
6339         if (block & RADEON_CG_BLOCK_GFX) {
6340                 cik_enable_gui_idle_interrupt(rdev, false);
6341                 /* order matters! */
6342                 if (enable) {
6343                         cik_enable_mgcg(rdev, true);
6344                         cik_enable_cgcg(rdev, true);
6345                 } else {
6346                         cik_enable_cgcg(rdev, false);
6347                         cik_enable_mgcg(rdev, false);
6348                 }
6349                 cik_enable_gui_idle_interrupt(rdev, true);
6350         }
6351
6352         if (block & RADEON_CG_BLOCK_MC) {
6353                 if (!(rdev->flags & RADEON_IS_IGP)) {
6354                         cik_enable_mc_mgcg(rdev, enable);
6355                         cik_enable_mc_ls(rdev, enable);
6356                 }
6357         }
6358
6359         if (block & RADEON_CG_BLOCK_SDMA) {
6360                 cik_enable_sdma_mgcg(rdev, enable);
6361                 cik_enable_sdma_mgls(rdev, enable);
6362         }
6363
6364         if (block & RADEON_CG_BLOCK_BIF) {
6365                 cik_enable_bif_mgls(rdev, enable);
6366         }
6367
6368         if (block & RADEON_CG_BLOCK_UVD) {
6369                 if (rdev->has_uvd)
6370                         cik_enable_uvd_mgcg(rdev, enable);
6371         }
6372
6373         if (block & RADEON_CG_BLOCK_HDP) {
6374                 cik_enable_hdp_mgcg(rdev, enable);
6375                 cik_enable_hdp_ls(rdev, enable);
6376         }
6377
6378         if (block & RADEON_CG_BLOCK_VCE) {
6379                 vce_v2_0_enable_mgcg(rdev, enable);
6380         }
6381 }
6382
6383 static void cik_init_cg(struct radeon_device *rdev)
6384 {
6385
6386         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6387
6388         if (rdev->has_uvd)
6389                 si_init_uvd_internal_cg(rdev);
6390
6391         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6392                              RADEON_CG_BLOCK_SDMA |
6393                              RADEON_CG_BLOCK_BIF |
6394                              RADEON_CG_BLOCK_UVD |
6395                              RADEON_CG_BLOCK_HDP), true);
6396 }
6397
6398 static void cik_fini_cg(struct radeon_device *rdev)
6399 {
6400         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6401                              RADEON_CG_BLOCK_SDMA |
6402                              RADEON_CG_BLOCK_BIF |
6403                              RADEON_CG_BLOCK_UVD |
6404                              RADEON_CG_BLOCK_HDP), false);
6405
6406         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6407 }
6408
6409 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6410                                           bool enable)
6411 {
6412         u32 data, orig;
6413
6414         orig = data = RREG32(RLC_PG_CNTL);
6415         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6416                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6417         else
6418                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6419         if (orig != data)
6420                 WREG32(RLC_PG_CNTL, data);
6421 }
6422
6423 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6424                                           bool enable)
6425 {
6426         u32 data, orig;
6427
6428         orig = data = RREG32(RLC_PG_CNTL);
6429         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6430                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6431         else
6432                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6433         if (orig != data)
6434                 WREG32(RLC_PG_CNTL, data);
6435 }
6436
6437 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6438 {
6439         u32 data, orig;
6440
6441         orig = data = RREG32(RLC_PG_CNTL);
6442         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6443                 data &= ~DISABLE_CP_PG;
6444         else
6445                 data |= DISABLE_CP_PG;
6446         if (orig != data)
6447                 WREG32(RLC_PG_CNTL, data);
6448 }
6449
6450 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6451 {
6452         u32 data, orig;
6453
6454         orig = data = RREG32(RLC_PG_CNTL);
6455         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6456                 data &= ~DISABLE_GDS_PG;
6457         else
6458                 data |= DISABLE_GDS_PG;
6459         if (orig != data)
6460                 WREG32(RLC_PG_CNTL, data);
6461 }
6462
6463 #define CP_ME_TABLE_SIZE    96
6464 #define CP_ME_TABLE_OFFSET  2048
6465 #define CP_MEC_TABLE_OFFSET 4096
6466
6467 void cik_init_cp_pg_table(struct radeon_device *rdev)
6468 {
6469         volatile u32 *dst_ptr;
6470         int me, i, max_me = 4;
6471         u32 bo_offset = 0;
6472         u32 table_offset, table_size;
6473
6474         if (rdev->family == CHIP_KAVERI)
6475                 max_me = 5;
6476
6477         if (rdev->rlc.cp_table_ptr == NULL)
6478                 return;
6479
6480         /* write the cp table buffer */
6481         dst_ptr = rdev->rlc.cp_table_ptr;
6482         for (me = 0; me < max_me; me++) {
6483                 if (rdev->new_fw) {
6484                         const __le32 *fw_data;
6485                         const struct gfx_firmware_header_v1_0 *hdr;
6486
6487                         if (me == 0) {
6488                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6489                                 fw_data = (const __le32 *)
6490                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6491                                 table_offset = le32_to_cpu(hdr->jt_offset);
6492                                 table_size = le32_to_cpu(hdr->jt_size);
6493                         } else if (me == 1) {
6494                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6495                                 fw_data = (const __le32 *)
6496                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6497                                 table_offset = le32_to_cpu(hdr->jt_offset);
6498                                 table_size = le32_to_cpu(hdr->jt_size);
6499                         } else if (me == 2) {
6500                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6501                                 fw_data = (const __le32 *)
6502                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6503                                 table_offset = le32_to_cpu(hdr->jt_offset);
6504                                 table_size = le32_to_cpu(hdr->jt_size);
6505                         } else if (me == 3) {
6506                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6507                                 fw_data = (const __le32 *)
6508                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6509                                 table_offset = le32_to_cpu(hdr->jt_offset);
6510                                 table_size = le32_to_cpu(hdr->jt_size);
6511                         } else {
6512                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6513                                 fw_data = (const __le32 *)
6514                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6515                                 table_offset = le32_to_cpu(hdr->jt_offset);
6516                                 table_size = le32_to_cpu(hdr->jt_size);
6517                         }
6518
6519                         for (i = 0; i < table_size; i ++) {
6520                                 dst_ptr[bo_offset + i] =
6521                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6522                         }
6523                         bo_offset += table_size;
6524                 } else {
6525                         const __be32 *fw_data;
6526                         table_size = CP_ME_TABLE_SIZE;
6527
6528                         if (me == 0) {
6529                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6530                                 table_offset = CP_ME_TABLE_OFFSET;
6531                         } else if (me == 1) {
6532                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6533                                 table_offset = CP_ME_TABLE_OFFSET;
6534                         } else if (me == 2) {
6535                                 fw_data = (const __be32 *)rdev->me_fw->data;
6536                                 table_offset = CP_ME_TABLE_OFFSET;
6537                         } else {
6538                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6539                                 table_offset = CP_MEC_TABLE_OFFSET;
6540                         }
6541
6542                         for (i = 0; i < table_size; i ++) {
6543                                 dst_ptr[bo_offset + i] =
6544                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6545                         }
6546                         bo_offset += table_size;
6547                 }
6548         }
6549 }
6550
6551 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6552                                 bool enable)
6553 {
6554         u32 data, orig;
6555
6556         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6557                 orig = data = RREG32(RLC_PG_CNTL);
6558                 data |= GFX_PG_ENABLE;
6559                 if (orig != data)
6560                         WREG32(RLC_PG_CNTL, data);
6561
6562                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6563                 data |= AUTO_PG_EN;
6564                 if (orig != data)
6565                         WREG32(RLC_AUTO_PG_CTRL, data);
6566         } else {
6567                 orig = data = RREG32(RLC_PG_CNTL);
6568                 data &= ~GFX_PG_ENABLE;
6569                 if (orig != data)
6570                         WREG32(RLC_PG_CNTL, data);
6571
6572                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6573                 data &= ~AUTO_PG_EN;
6574                 if (orig != data)
6575                         WREG32(RLC_AUTO_PG_CTRL, data);
6576
6577                 data = RREG32(DB_RENDER_CONTROL);
6578         }
6579 }
6580
6581 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6582 {
6583         u32 mask = 0, tmp, tmp1;
6584         int i;
6585
6586         mutex_lock(&rdev->grbm_idx_mutex);
6587         cik_select_se_sh(rdev, se, sh);
6588         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6589         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6590         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6591         mutex_unlock(&rdev->grbm_idx_mutex);
6592
6593         tmp &= 0xffff0000;
6594
6595         tmp |= tmp1;
6596         tmp >>= 16;
6597
6598         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6599                 mask <<= 1;
6600                 mask |= 1;
6601         }
6602
6603         return (~tmp) & mask;
6604 }
6605
6606 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6607 {
6608         u32 i, j, k, active_cu_number = 0;
6609         u32 mask, counter, cu_bitmap;
6610         u32 tmp = 0;
6611
6612         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6613                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6614                         mask = 1;
6615                         cu_bitmap = 0;
6616                         counter = 0;
6617                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6618                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6619                                         if (counter < 2)
6620                                                 cu_bitmap |= mask;
6621                                         counter ++;
6622                                 }
6623                                 mask <<= 1;
6624                         }
6625
6626                         active_cu_number += counter;
6627                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6628                 }
6629         }
6630
6631         WREG32(RLC_PG_AO_CU_MASK, tmp);
6632
6633         tmp = RREG32(RLC_MAX_PG_CU);
6634         tmp &= ~MAX_PU_CU_MASK;
6635         tmp |= MAX_PU_CU(active_cu_number);
6636         WREG32(RLC_MAX_PG_CU, tmp);
6637 }
6638
6639 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6640                                        bool enable)
6641 {
6642         u32 data, orig;
6643
6644         orig = data = RREG32(RLC_PG_CNTL);
6645         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6646                 data |= STATIC_PER_CU_PG_ENABLE;
6647         else
6648                 data &= ~STATIC_PER_CU_PG_ENABLE;
6649         if (orig != data)
6650                 WREG32(RLC_PG_CNTL, data);
6651 }
6652
6653 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6654                                         bool enable)
6655 {
6656         u32 data, orig;
6657
6658         orig = data = RREG32(RLC_PG_CNTL);
6659         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6660                 data |= DYN_PER_CU_PG_ENABLE;
6661         else
6662                 data &= ~DYN_PER_CU_PG_ENABLE;
6663         if (orig != data)
6664                 WREG32(RLC_PG_CNTL, data);
6665 }
6666
6667 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6668 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6669
6670 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6671 {
6672         u32 data, orig;
6673         u32 i;
6674
6675         if (rdev->rlc.cs_data) {
6676                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6677                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6678                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6679                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6680         } else {
6681                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6682                 for (i = 0; i < 3; i++)
6683                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6684         }
6685         if (rdev->rlc.reg_list) {
6686                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6687                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6688                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6689         }
6690
6691         orig = data = RREG32(RLC_PG_CNTL);
6692         data |= GFX_PG_SRC;
6693         if (orig != data)
6694                 WREG32(RLC_PG_CNTL, data);
6695
6696         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6697         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6698
6699         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6700         data &= ~IDLE_POLL_COUNT_MASK;
6701         data |= IDLE_POLL_COUNT(0x60);
6702         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6703
6704         data = 0x10101010;
6705         WREG32(RLC_PG_DELAY, data);
6706
6707         data = RREG32(RLC_PG_DELAY_2);
6708         data &= ~0xff;
6709         data |= 0x3;
6710         WREG32(RLC_PG_DELAY_2, data);
6711
6712         data = RREG32(RLC_AUTO_PG_CTRL);
6713         data &= ~GRBM_REG_SGIT_MASK;
6714         data |= GRBM_REG_SGIT(0x700);
6715         WREG32(RLC_AUTO_PG_CTRL, data);
6716
6717 }
6718
6719 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6720 {
6721         cik_enable_gfx_cgpg(rdev, enable);
6722         cik_enable_gfx_static_mgpg(rdev, enable);
6723         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6724 }
6725
6726 u32 cik_get_csb_size(struct radeon_device *rdev)
6727 {
6728         u32 count = 0;
6729         const struct cs_section_def *sect = NULL;
6730         const struct cs_extent_def *ext = NULL;
6731
6732         if (rdev->rlc.cs_data == NULL)
6733                 return 0;
6734
6735         /* begin clear state */
6736         count += 2;
6737         /* context control state */
6738         count += 3;
6739
6740         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6741                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6742                         if (sect->id == SECT_CONTEXT)
6743                                 count += 2 + ext->reg_count;
6744                         else
6745                                 return 0;
6746                 }
6747         }
6748         /* pa_sc_raster_config/pa_sc_raster_config1 */
6749         count += 4;
6750         /* end clear state */
6751         count += 2;
6752         /* clear state */
6753         count += 2;
6754
6755         return count;
6756 }
6757
6758 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6759 {
6760         u32 count = 0, i;
6761         const struct cs_section_def *sect = NULL;
6762         const struct cs_extent_def *ext = NULL;
6763
6764         if (rdev->rlc.cs_data == NULL)
6765                 return;
6766         if (buffer == NULL)
6767                 return;
6768
6769         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6770         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6771
6772         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6773         buffer[count++] = cpu_to_le32(0x80000000);
6774         buffer[count++] = cpu_to_le32(0x80000000);
6775
6776         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6777                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6778                         if (sect->id == SECT_CONTEXT) {
6779                                 buffer[count++] =
6780                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6781                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6782                                 for (i = 0; i < ext->reg_count; i++)
6783                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6784                         } else {
6785                                 return;
6786                         }
6787                 }
6788         }
6789
6790         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6791         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6792         switch (rdev->family) {
6793         case CHIP_BONAIRE:
6794                 buffer[count++] = cpu_to_le32(0x16000012);
6795                 buffer[count++] = cpu_to_le32(0x00000000);
6796                 break;
6797         case CHIP_KAVERI:
6798                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6799                 buffer[count++] = cpu_to_le32(0x00000000);
6800                 break;
6801         case CHIP_KABINI:
6802         case CHIP_MULLINS:
6803                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6804                 buffer[count++] = cpu_to_le32(0x00000000);
6805                 break;
6806         case CHIP_HAWAII:
6807                 buffer[count++] = cpu_to_le32(0x3a00161a);
6808                 buffer[count++] = cpu_to_le32(0x0000002e);
6809                 break;
6810         default:
6811                 buffer[count++] = cpu_to_le32(0x00000000);
6812                 buffer[count++] = cpu_to_le32(0x00000000);
6813                 break;
6814         }
6815
6816         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6817         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6818
6819         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6820         buffer[count++] = cpu_to_le32(0);
6821 }
6822
6823 static void cik_init_pg(struct radeon_device *rdev)
6824 {
6825         if (rdev->pg_flags) {
6826                 cik_enable_sck_slowdown_on_pu(rdev, true);
6827                 cik_enable_sck_slowdown_on_pd(rdev, true);
6828                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6829                         cik_init_gfx_cgpg(rdev);
6830                         cik_enable_cp_pg(rdev, true);
6831                         cik_enable_gds_pg(rdev, true);
6832                 }
6833                 cik_init_ao_cu_mask(rdev);
6834                 cik_update_gfx_pg(rdev, true);
6835         }
6836 }
6837
6838 static void cik_fini_pg(struct radeon_device *rdev)
6839 {
6840         if (rdev->pg_flags) {
6841                 cik_update_gfx_pg(rdev, false);
6842                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6843                         cik_enable_cp_pg(rdev, false);
6844                         cik_enable_gds_pg(rdev, false);
6845                 }
6846         }
6847 }
6848
6849 /*
6850  * Interrupts
6851  * Starting with r6xx, interrupts are handled via a ring buffer.
6852  * Ring buffers are areas of GPU accessible memory that the GPU
6853  * writes interrupt vectors into and the host reads vectors out of.
6854  * There is a rptr (read pointer) that determines where the
6855  * host is currently reading, and a wptr (write pointer)
6856  * which determines where the GPU has written.  When the
6857  * pointers are equal, the ring is idle.  When the GPU
6858  * writes vectors to the ring buffer, it increments the
6859  * wptr.  When there is an interrupt, the host then starts
6860  * fetching commands and processing them until the pointers are
6861  * equal again at which point it updates the rptr.
6862  */
6863
6864 /**
6865  * cik_enable_interrupts - Enable the interrupt ring buffer
6866  *
6867  * @rdev: radeon_device pointer
6868  *
6869  * Enable the interrupt ring buffer (CIK).
6870  */
6871 static void cik_enable_interrupts(struct radeon_device *rdev)
6872 {
6873         u32 ih_cntl = RREG32(IH_CNTL);
6874         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6875
6876         ih_cntl |= ENABLE_INTR;
6877         ih_rb_cntl |= IH_RB_ENABLE;
6878         WREG32(IH_CNTL, ih_cntl);
6879         WREG32(IH_RB_CNTL, ih_rb_cntl);
6880         rdev->ih.enabled = true;
6881 }
6882
6883 /**
6884  * cik_disable_interrupts - Disable the interrupt ring buffer
6885  *
6886  * @rdev: radeon_device pointer
6887  *
6888  * Disable the interrupt ring buffer (CIK).
6889  */
6890 static void cik_disable_interrupts(struct radeon_device *rdev)
6891 {
6892         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6893         u32 ih_cntl = RREG32(IH_CNTL);
6894
6895         ih_rb_cntl &= ~IH_RB_ENABLE;
6896         ih_cntl &= ~ENABLE_INTR;
6897         WREG32(IH_RB_CNTL, ih_rb_cntl);
6898         WREG32(IH_CNTL, ih_cntl);
6899         /* set rptr, wptr to 0 */
6900         WREG32(IH_RB_RPTR, 0);
6901         WREG32(IH_RB_WPTR, 0);
6902         rdev->ih.enabled = false;
6903         rdev->ih.rptr = 0;
6904 }
6905
6906 /**
6907  * cik_disable_interrupt_state - Disable all interrupt sources
6908  *
6909  * @rdev: radeon_device pointer
6910  *
6911  * Clear all interrupt enable bits used by the driver (CIK).
6912  */
6913 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6914 {
6915         u32 tmp;
6916
6917         /* gfx ring */
6918         tmp = RREG32(CP_INT_CNTL_RING0) &
6919                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6920         WREG32(CP_INT_CNTL_RING0, tmp);
6921         /* sdma */
6922         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6923         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6924         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6925         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6926         /* compute queues */
6927         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6928         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6929         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6930         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6931         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6932         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6933         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6934         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6935         /* grbm */
6936         WREG32(GRBM_INT_CNTL, 0);
6937         /* SRBM */
6938         WREG32(SRBM_INT_CNTL, 0);
6939         /* vline/vblank, etc. */
6940         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6941         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6942         if (rdev->num_crtc >= 4) {
6943                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6944                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6945         }
6946         if (rdev->num_crtc >= 6) {
6947                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6948                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6949         }
6950         /* pflip */
6951         if (rdev->num_crtc >= 2) {
6952                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6953                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6954         }
6955         if (rdev->num_crtc >= 4) {
6956                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6957                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6958         }
6959         if (rdev->num_crtc >= 6) {
6960                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6961                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6962         }
6963
6964         /* dac hotplug */
6965         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6966
6967         /* digital hotplug */
6968         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6969         WREG32(DC_HPD1_INT_CONTROL, tmp);
6970         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6971         WREG32(DC_HPD2_INT_CONTROL, tmp);
6972         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6973         WREG32(DC_HPD3_INT_CONTROL, tmp);
6974         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6975         WREG32(DC_HPD4_INT_CONTROL, tmp);
6976         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6977         WREG32(DC_HPD5_INT_CONTROL, tmp);
6978         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6979         WREG32(DC_HPD6_INT_CONTROL, tmp);
6980
6981 }
6982
6983 /**
6984  * cik_irq_init - init and enable the interrupt ring
6985  *
6986  * @rdev: radeon_device pointer
6987  *
6988  * Allocate a ring buffer for the interrupt controller,
6989  * enable the RLC, disable interrupts, enable the IH
6990  * ring buffer and enable it (CIK).
6991  * Called at device load and reume.
6992  * Returns 0 for success, errors for failure.
6993  */
6994 static int cik_irq_init(struct radeon_device *rdev)
6995 {
6996         int ret = 0;
6997         int rb_bufsz;
6998         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6999
7000         /* allocate ring */
7001         ret = r600_ih_ring_alloc(rdev);
7002         if (ret)
7003                 return ret;
7004
7005         /* disable irqs */
7006         cik_disable_interrupts(rdev);
7007
7008         /* init rlc */
7009         ret = cik_rlc_resume(rdev);
7010         if (ret) {
7011                 r600_ih_ring_fini(rdev);
7012                 return ret;
7013         }
7014
7015         /* setup interrupt control */
7016         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7017         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7018         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7019         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7020          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7021          */
7022         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7023         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7024         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7025         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7026
7027         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7028         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7029
7030         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7031                       IH_WPTR_OVERFLOW_CLEAR |
7032                       (rb_bufsz << 1));
7033
7034         if (rdev->wb.enabled)
7035                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7036
7037         /* set the writeback address whether it's enabled or not */
7038         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7039         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7040
7041         WREG32(IH_RB_CNTL, ih_rb_cntl);
7042
7043         /* set rptr, wptr to 0 */
7044         WREG32(IH_RB_RPTR, 0);
7045         WREG32(IH_RB_WPTR, 0);
7046
7047         /* Default settings for IH_CNTL (disabled at first) */
7048         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7049         /* RPTR_REARM only works if msi's are enabled */
7050         if (rdev->msi_enabled)
7051                 ih_cntl |= RPTR_REARM;
7052         WREG32(IH_CNTL, ih_cntl);
7053
7054         /* force the active interrupt state to all disabled */
7055         cik_disable_interrupt_state(rdev);
7056
7057         pci_set_master(rdev->pdev);
7058
7059         /* enable irqs */
7060         cik_enable_interrupts(rdev);
7061
7062         return ret;
7063 }
7064
7065 /**
7066  * cik_irq_set - enable/disable interrupt sources
7067  *
7068  * @rdev: radeon_device pointer
7069  *
7070  * Enable interrupt sources on the GPU (vblanks, hpd,
7071  * etc.) (CIK).
7072  * Returns 0 for success, errors for failure.
7073  */
7074 int cik_irq_set(struct radeon_device *rdev)
7075 {
7076         u32 cp_int_cntl;
7077         u32 cp_m1p0;
7078         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7079         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7080         u32 grbm_int_cntl = 0;
7081         u32 dma_cntl, dma_cntl1;
7082
7083         if (!rdev->irq.installed) {
7084                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7085                 return -EINVAL;
7086         }
7087         /* don't enable anything if the ih is disabled */
7088         if (!rdev->ih.enabled) {
7089                 cik_disable_interrupts(rdev);
7090                 /* force the active interrupt state to all disabled */
7091                 cik_disable_interrupt_state(rdev);
7092                 return 0;
7093         }
7094
7095         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7096                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7097         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7098
7099         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7100         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7101         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7102         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7103         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7104         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7105
7106         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7107         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7108
7109         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7110
7111         /* enable CP interrupts on all rings */
7112         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7113                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7114                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7115         }
7116         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7117                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7118                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7119                 if (ring->me == 1) {
7120                         switch (ring->pipe) {
7121                         case 0:
7122                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7123                                 break;
7124                         default:
7125                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7126                                 break;
7127                         }
7128                 } else {
7129                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7130                 }
7131         }
7132         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7133                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7134                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7135                 if (ring->me == 1) {
7136                         switch (ring->pipe) {
7137                         case 0:
7138                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7139                                 break;
7140                         default:
7141                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7142                                 break;
7143                         }
7144                 } else {
7145                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7146                 }
7147         }
7148
7149         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7150                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7151                 dma_cntl |= TRAP_ENABLE;
7152         }
7153
7154         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7155                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7156                 dma_cntl1 |= TRAP_ENABLE;
7157         }
7158
7159         if (rdev->irq.crtc_vblank_int[0] ||
7160             atomic_read(&rdev->irq.pflip[0])) {
7161                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7162                 crtc1 |= VBLANK_INTERRUPT_MASK;
7163         }
7164         if (rdev->irq.crtc_vblank_int[1] ||
7165             atomic_read(&rdev->irq.pflip[1])) {
7166                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7167                 crtc2 |= VBLANK_INTERRUPT_MASK;
7168         }
7169         if (rdev->irq.crtc_vblank_int[2] ||
7170             atomic_read(&rdev->irq.pflip[2])) {
7171                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7172                 crtc3 |= VBLANK_INTERRUPT_MASK;
7173         }
7174         if (rdev->irq.crtc_vblank_int[3] ||
7175             atomic_read(&rdev->irq.pflip[3])) {
7176                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7177                 crtc4 |= VBLANK_INTERRUPT_MASK;
7178         }
7179         if (rdev->irq.crtc_vblank_int[4] ||
7180             atomic_read(&rdev->irq.pflip[4])) {
7181                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7182                 crtc5 |= VBLANK_INTERRUPT_MASK;
7183         }
7184         if (rdev->irq.crtc_vblank_int[5] ||
7185             atomic_read(&rdev->irq.pflip[5])) {
7186                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7187                 crtc6 |= VBLANK_INTERRUPT_MASK;
7188         }
7189         if (rdev->irq.hpd[0]) {
7190                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7191                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7192         }
7193         if (rdev->irq.hpd[1]) {
7194                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7195                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7196         }
7197         if (rdev->irq.hpd[2]) {
7198                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7199                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7200         }
7201         if (rdev->irq.hpd[3]) {
7202                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7203                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7204         }
7205         if (rdev->irq.hpd[4]) {
7206                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7207                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7208         }
7209         if (rdev->irq.hpd[5]) {
7210                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7211                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7212         }
7213
7214         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7215
7216         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7217         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7218
7219         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7220
7221         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7222
7223         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7224         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7225         if (rdev->num_crtc >= 4) {
7226                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7227                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7228         }
7229         if (rdev->num_crtc >= 6) {
7230                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7231                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7232         }
7233
7234         if (rdev->num_crtc >= 2) {
7235                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7236                        GRPH_PFLIP_INT_MASK);
7237                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7238                        GRPH_PFLIP_INT_MASK);
7239         }
7240         if (rdev->num_crtc >= 4) {
7241                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7242                        GRPH_PFLIP_INT_MASK);
7243                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7244                        GRPH_PFLIP_INT_MASK);
7245         }
7246         if (rdev->num_crtc >= 6) {
7247                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7248                        GRPH_PFLIP_INT_MASK);
7249                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7250                        GRPH_PFLIP_INT_MASK);
7251         }
7252
7253         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7254         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7255         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7256         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7257         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7258         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7259
7260         /* posting read */
7261         RREG32(SRBM_STATUS);
7262
7263         return 0;
7264 }
7265
7266 /**
7267  * cik_irq_ack - ack interrupt sources
7268  *
7269  * @rdev: radeon_device pointer
7270  *
7271  * Ack interrupt sources on the GPU (vblanks, hpd,
7272  * etc.) (CIK).  Certain interrupts sources are sw
7273  * generated and do not require an explicit ack.
7274  */
7275 static inline void cik_irq_ack(struct radeon_device *rdev)
7276 {
7277         u32 tmp;
7278
7279         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7280         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7281         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7282         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7283         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7284         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7285         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7286
7287         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7288                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7289         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7290                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7291         if (rdev->num_crtc >= 4) {
7292                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7293                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7294                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7295                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7296         }
7297         if (rdev->num_crtc >= 6) {
7298                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7299                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7300                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7301                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7302         }
7303
7304         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7305                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7306                        GRPH_PFLIP_INT_CLEAR);
7307         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7308                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7309                        GRPH_PFLIP_INT_CLEAR);
7310         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7311                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7312         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7313                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7314         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7315                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7316         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7317                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7318
7319         if (rdev->num_crtc >= 4) {
7320                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7321                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7322                                GRPH_PFLIP_INT_CLEAR);
7323                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7324                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7325                                GRPH_PFLIP_INT_CLEAR);
7326                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7327                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7328                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7329                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7330                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7331                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7332                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7333                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7334         }
7335
7336         if (rdev->num_crtc >= 6) {
7337                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7338                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7339                                GRPH_PFLIP_INT_CLEAR);
7340                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7341                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7342                                GRPH_PFLIP_INT_CLEAR);
7343                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7344                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7345                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7346                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7347                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7348                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7349                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7350                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7351         }
7352
7353         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7354                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7355                 tmp |= DC_HPDx_INT_ACK;
7356                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7357         }
7358         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7359                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7360                 tmp |= DC_HPDx_INT_ACK;
7361                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7362         }
7363         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7364                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7365                 tmp |= DC_HPDx_INT_ACK;
7366                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7367         }
7368         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7369                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7370                 tmp |= DC_HPDx_INT_ACK;
7371                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7372         }
7373         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7374                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7375                 tmp |= DC_HPDx_INT_ACK;
7376                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7377         }
7378         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7379                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7380                 tmp |= DC_HPDx_INT_ACK;
7381                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7382         }
7383         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7384                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7385                 tmp |= DC_HPDx_RX_INT_ACK;
7386                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7387         }
7388         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7389                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7390                 tmp |= DC_HPDx_RX_INT_ACK;
7391                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7392         }
7393         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7394                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7395                 tmp |= DC_HPDx_RX_INT_ACK;
7396                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7397         }
7398         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7399                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7400                 tmp |= DC_HPDx_RX_INT_ACK;
7401                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7402         }
7403         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7404                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7405                 tmp |= DC_HPDx_RX_INT_ACK;
7406                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7407         }
7408         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7409                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7410                 tmp |= DC_HPDx_RX_INT_ACK;
7411                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7412         }
7413 }
7414
7415 /**
7416  * cik_irq_disable - disable interrupts
7417  *
7418  * @rdev: radeon_device pointer
7419  *
7420  * Disable interrupts on the hw (CIK).
7421  */
7422 static void cik_irq_disable(struct radeon_device *rdev)
7423 {
7424         cik_disable_interrupts(rdev);
7425         /* Wait and acknowledge irq */
7426         mdelay(1);
7427         cik_irq_ack(rdev);
7428         cik_disable_interrupt_state(rdev);
7429 }
7430
7431 /**
7432  * cik_irq_disable - disable interrupts for suspend
7433  *
7434  * @rdev: radeon_device pointer
7435  *
7436  * Disable interrupts and stop the RLC (CIK).
7437  * Used for suspend.
7438  */
7439 static void cik_irq_suspend(struct radeon_device *rdev)
7440 {
7441         cik_irq_disable(rdev);
7442         cik_rlc_stop(rdev);
7443 }
7444
7445 /**
7446  * cik_irq_fini - tear down interrupt support
7447  *
7448  * @rdev: radeon_device pointer
7449  *
7450  * Disable interrupts on the hw and free the IH ring
7451  * buffer (CIK).
7452  * Used for driver unload.
7453  */
7454 static void cik_irq_fini(struct radeon_device *rdev)
7455 {
7456         cik_irq_suspend(rdev);
7457         r600_ih_ring_fini(rdev);
7458 }
7459
7460 /**
7461  * cik_get_ih_wptr - get the IH ring buffer wptr
7462  *
7463  * @rdev: radeon_device pointer
7464  *
7465  * Get the IH ring buffer wptr from either the register
7466  * or the writeback memory buffer (CIK).  Also check for
7467  * ring buffer overflow and deal with it.
7468  * Used by cik_irq_process().
7469  * Returns the value of the wptr.
7470  */
7471 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7472 {
7473         u32 wptr, tmp;
7474
7475         if (rdev->wb.enabled)
7476                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7477         else
7478                 wptr = RREG32(IH_RB_WPTR);
7479
7480         if (wptr & RB_OVERFLOW) {
7481                 wptr &= ~RB_OVERFLOW;
7482                 /* When a ring buffer overflow happen start parsing interrupt
7483                  * from the last not overwritten vector (wptr + 16). Hopefully
7484                  * this should allow us to catchup.
7485                  */
7486                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7487                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7488                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7489                 tmp = RREG32(IH_RB_CNTL);
7490                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7491                 WREG32(IH_RB_CNTL, tmp);
7492         }
7493         return (wptr & rdev->ih.ptr_mask);
7494 }
7495
7496 /*        CIK IV Ring
7497  * Each IV ring entry is 128 bits:
7498  * [7:0]    - interrupt source id
7499  * [31:8]   - reserved
7500  * [59:32]  - interrupt source data
7501  * [63:60]  - reserved
7502  * [71:64]  - RINGID
7503  *            CP:
7504  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7505  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7506  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7507  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7508  *            PIPE_ID - ME0 0=3D
7509  *                    - ME1&2 compute dispatcher (4 pipes each)
7510  *            SDMA:
7511  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7512  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7513  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7514  * [79:72]  - VMID
7515  * [95:80]  - PASID
7516  * [127:96] - reserved
7517  */
7518 /**
7519  * cik_irq_process - interrupt handler
7520  *
7521  * @rdev: radeon_device pointer
7522  *
7523  * Interrupt hander (CIK).  Walk the IH ring,
7524  * ack interrupts and schedule work to handle
7525  * interrupt events.
7526  * Returns irq process return code.
7527  */
7528 int cik_irq_process(struct radeon_device *rdev)
7529 {
7530         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7531         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7532         u32 wptr;
7533         u32 rptr;
7534         u32 src_id, src_data, ring_id;
7535         u8 me_id, pipe_id, queue_id;
7536         u32 ring_index;
7537         bool queue_hotplug = false;
7538         bool queue_dp = false;
7539         bool queue_reset = false;
7540         u32 addr, status, mc_client;
7541         bool queue_thermal = false;
7542
7543         if (!rdev->ih.enabled || rdev->shutdown)
7544                 return IRQ_NONE;
7545
7546         wptr = cik_get_ih_wptr(rdev);
7547
7548 restart_ih:
7549         /* is somebody else already processing irqs? */
7550         if (atomic_xchg(&rdev->ih.lock, 1))
7551                 return IRQ_NONE;
7552
7553         rptr = rdev->ih.rptr;
7554         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7555
7556         /* Order reading of wptr vs. reading of IH ring data */
7557         rmb();
7558
7559         /* display interrupts */
7560         cik_irq_ack(rdev);
7561
7562         while (rptr != wptr) {
7563                 /* wptr/rptr are in bytes! */
7564                 ring_index = rptr / 4;
7565
7566                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7567                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7568                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7569
7570                 switch (src_id) {
7571                 case 1: /* D1 vblank/vline */
7572                         switch (src_data) {
7573                         case 0: /* D1 vblank */
7574                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7575                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7576
7577                                 if (rdev->irq.crtc_vblank_int[0]) {
7578                                         drm_handle_vblank(rdev->ddev, 0);
7579                                         rdev->pm.vblank_sync = true;
7580                                         wake_up(&rdev->irq.vblank_queue);
7581                                 }
7582                                 if (atomic_read(&rdev->irq.pflip[0]))
7583                                         radeon_crtc_handle_vblank(rdev, 0);
7584                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7585                                 DRM_DEBUG("IH: D1 vblank\n");
7586
7587                                 break;
7588                         case 1: /* D1 vline */
7589                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7590                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7591
7592                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7593                                 DRM_DEBUG("IH: D1 vline\n");
7594
7595                                 break;
7596                         default:
7597                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7598                                 break;
7599                         }
7600                         break;
7601                 case 2: /* D2 vblank/vline */
7602                         switch (src_data) {
7603                         case 0: /* D2 vblank */
7604                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7605                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7606
7607                                 if (rdev->irq.crtc_vblank_int[1]) {
7608                                         drm_handle_vblank(rdev->ddev, 1);
7609                                         rdev->pm.vblank_sync = true;
7610                                         wake_up(&rdev->irq.vblank_queue);
7611                                 }
7612                                 if (atomic_read(&rdev->irq.pflip[1]))
7613                                         radeon_crtc_handle_vblank(rdev, 1);
7614                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7615                                 DRM_DEBUG("IH: D2 vblank\n");
7616
7617                                 break;
7618                         case 1: /* D2 vline */
7619                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7620                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7621
7622                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7623                                 DRM_DEBUG("IH: D2 vline\n");
7624
7625                                 break;
7626                         default:
7627                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7628                                 break;
7629                         }
7630                         break;
7631                 case 3: /* D3 vblank/vline */
7632                         switch (src_data) {
7633                         case 0: /* D3 vblank */
7634                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7635                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7636
7637                                 if (rdev->irq.crtc_vblank_int[2]) {
7638                                         drm_handle_vblank(rdev->ddev, 2);
7639                                         rdev->pm.vblank_sync = true;
7640                                         wake_up(&rdev->irq.vblank_queue);
7641                                 }
7642                                 if (atomic_read(&rdev->irq.pflip[2]))
7643                                         radeon_crtc_handle_vblank(rdev, 2);
7644                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7645                                 DRM_DEBUG("IH: D3 vblank\n");
7646
7647                                 break;
7648                         case 1: /* D3 vline */
7649                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7650                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7651
7652                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7653                                 DRM_DEBUG("IH: D3 vline\n");
7654
7655                                 break;
7656                         default:
7657                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7658                                 break;
7659                         }
7660                         break;
7661                 case 4: /* D4 vblank/vline */
7662                         switch (src_data) {
7663                         case 0: /* D4 vblank */
7664                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7665                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7666
7667                                 if (rdev->irq.crtc_vblank_int[3]) {
7668                                         drm_handle_vblank(rdev->ddev, 3);
7669                                         rdev->pm.vblank_sync = true;
7670                                         wake_up(&rdev->irq.vblank_queue);
7671                                 }
7672                                 if (atomic_read(&rdev->irq.pflip[3]))
7673                                         radeon_crtc_handle_vblank(rdev, 3);
7674                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7675                                 DRM_DEBUG("IH: D4 vblank\n");
7676
7677                                 break;
7678                         case 1: /* D4 vline */
7679                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7680                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7681
7682                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7683                                 DRM_DEBUG("IH: D4 vline\n");
7684
7685                                 break;
7686                         default:
7687                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7688                                 break;
7689                         }
7690                         break;
7691                 case 5: /* D5 vblank/vline */
7692                         switch (src_data) {
7693                         case 0: /* D5 vblank */
7694                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7695                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7696
7697                                 if (rdev->irq.crtc_vblank_int[4]) {
7698                                         drm_handle_vblank(rdev->ddev, 4);
7699                                         rdev->pm.vblank_sync = true;
7700                                         wake_up(&rdev->irq.vblank_queue);
7701                                 }
7702                                 if (atomic_read(&rdev->irq.pflip[4]))
7703                                         radeon_crtc_handle_vblank(rdev, 4);
7704                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7705                                 DRM_DEBUG("IH: D5 vblank\n");
7706
7707                                 break;
7708                         case 1: /* D5 vline */
7709                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7710                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7711
7712                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7713                                 DRM_DEBUG("IH: D5 vline\n");
7714
7715                                 break;
7716                         default:
7717                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7718                                 break;
7719                         }
7720                         break;
7721                 case 6: /* D6 vblank/vline */
7722                         switch (src_data) {
7723                         case 0: /* D6 vblank */
7724                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7725                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7726
7727                                 if (rdev->irq.crtc_vblank_int[5]) {
7728                                         drm_handle_vblank(rdev->ddev, 5);
7729                                         rdev->pm.vblank_sync = true;
7730                                         wake_up(&rdev->irq.vblank_queue);
7731                                 }
7732                                 if (atomic_read(&rdev->irq.pflip[5]))
7733                                         radeon_crtc_handle_vblank(rdev, 5);
7734                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7735                                 DRM_DEBUG("IH: D6 vblank\n");
7736
7737                                 break;
7738                         case 1: /* D6 vline */
7739                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7740                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7741
7742                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7743                                 DRM_DEBUG("IH: D6 vline\n");
7744
7745                                 break;
7746                         default:
7747                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7748                                 break;
7749                         }
7750                         break;
7751                 case 8: /* D1 page flip */
7752                 case 10: /* D2 page flip */
7753                 case 12: /* D3 page flip */
7754                 case 14: /* D4 page flip */
7755                 case 16: /* D5 page flip */
7756                 case 18: /* D6 page flip */
7757                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7758                         if (radeon_use_pflipirq > 0)
7759                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7760                         break;
7761                 case 42: /* HPD hotplug */
7762                         switch (src_data) {
7763                         case 0:
7764                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7765                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766
7767                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7768                                 queue_hotplug = true;
7769                                 DRM_DEBUG("IH: HPD1\n");
7770
7771                                 break;
7772                         case 1:
7773                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7774                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7775
7776                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7777                                 queue_hotplug = true;
7778                                 DRM_DEBUG("IH: HPD2\n");
7779
7780                                 break;
7781                         case 2:
7782                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7783                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7784
7785                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7786                                 queue_hotplug = true;
7787                                 DRM_DEBUG("IH: HPD3\n");
7788
7789                                 break;
7790                         case 3:
7791                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7792                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7793
7794                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7795                                 queue_hotplug = true;
7796                                 DRM_DEBUG("IH: HPD4\n");
7797
7798                                 break;
7799                         case 4:
7800                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7801                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7802
7803                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7804                                 queue_hotplug = true;
7805                                 DRM_DEBUG("IH: HPD5\n");
7806
7807                                 break;
7808                         case 5:
7809                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7810                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7811
7812                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7813                                 queue_hotplug = true;
7814                                 DRM_DEBUG("IH: HPD6\n");
7815
7816                                 break;
7817                         case 6:
7818                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7819                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7820
7821                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7822                                 queue_dp = true;
7823                                 DRM_DEBUG("IH: HPD_RX 1\n");
7824
7825                                 break;
7826                         case 7:
7827                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7828                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7829
7830                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7831                                 queue_dp = true;
7832                                 DRM_DEBUG("IH: HPD_RX 2\n");
7833
7834                                 break;
7835                         case 8:
7836                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7837                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7838
7839                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7840                                 queue_dp = true;
7841                                 DRM_DEBUG("IH: HPD_RX 3\n");
7842
7843                                 break;
7844                         case 9:
7845                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7846                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7847
7848                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7849                                 queue_dp = true;
7850                                 DRM_DEBUG("IH: HPD_RX 4\n");
7851
7852                                 break;
7853                         case 10:
7854                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7855                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7856
7857                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7858                                 queue_dp = true;
7859                                 DRM_DEBUG("IH: HPD_RX 5\n");
7860
7861                                 break;
7862                         case 11:
7863                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7864                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7865
7866                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7867                                 queue_dp = true;
7868                                 DRM_DEBUG("IH: HPD_RX 6\n");
7869
7870                                 break;
7871                         default:
7872                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7873                                 break;
7874                         }
7875                         break;
7876                 case 96:
7877                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7878                         WREG32(SRBM_INT_ACK, 0x1);
7879                         break;
7880                 case 124: /* UVD */
7881                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7882                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7883                         break;
7884                 case 146:
7885                 case 147:
7886                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7887                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7888                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7889                         /* reset addr and status */
7890                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7891                         if (addr == 0x0 && status == 0x0)
7892                                 break;
7893                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7894                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7895                                 addr);
7896                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7897                                 status);
7898                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7899                         break;
7900                 case 167: /* VCE */
7901                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7902                         switch (src_data) {
7903                         case 0:
7904                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7905                                 break;
7906                         case 1:
7907                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7908                                 break;
7909                         default:
7910                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7911                                 break;
7912                         }
7913                         break;
7914                 case 176: /* GFX RB CP_INT */
7915                 case 177: /* GFX IB CP_INT */
7916                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7917                         break;
7918                 case 181: /* CP EOP event */
7919                         DRM_DEBUG("IH: CP EOP\n");
7920                         /* XXX check the bitfield order! */
7921                         me_id = (ring_id & 0x60) >> 5;
7922                         pipe_id = (ring_id & 0x18) >> 3;
7923                         queue_id = (ring_id & 0x7) >> 0;
7924                         switch (me_id) {
7925                         case 0:
7926                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7927                                 break;
7928                         case 1:
7929                         case 2:
7930                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7931                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7932                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7933                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7934                                 break;
7935                         }
7936                         break;
7937                 case 184: /* CP Privileged reg access */
7938                         DRM_ERROR("Illegal register access in command stream\n");
7939                         /* XXX check the bitfield order! */
7940                         me_id = (ring_id & 0x60) >> 5;
7941                         pipe_id = (ring_id & 0x18) >> 3;
7942                         queue_id = (ring_id & 0x7) >> 0;
7943                         switch (me_id) {
7944                         case 0:
7945                                 /* This results in a full GPU reset, but all we need to do is soft
7946                                  * reset the CP for gfx
7947                                  */
7948                                 queue_reset = true;
7949                                 break;
7950                         case 1:
7951                                 /* XXX compute */
7952                                 queue_reset = true;
7953                                 break;
7954                         case 2:
7955                                 /* XXX compute */
7956                                 queue_reset = true;
7957                                 break;
7958                         }
7959                         break;
7960                 case 185: /* CP Privileged inst */
7961                         DRM_ERROR("Illegal instruction in command stream\n");
7962                         /* XXX check the bitfield order! */
7963                         me_id = (ring_id & 0x60) >> 5;
7964                         pipe_id = (ring_id & 0x18) >> 3;
7965                         queue_id = (ring_id & 0x7) >> 0;
7966                         switch (me_id) {
7967                         case 0:
7968                                 /* This results in a full GPU reset, but all we need to do is soft
7969                                  * reset the CP for gfx
7970                                  */
7971                                 queue_reset = true;
7972                                 break;
7973                         case 1:
7974                                 /* XXX compute */
7975                                 queue_reset = true;
7976                                 break;
7977                         case 2:
7978                                 /* XXX compute */
7979                                 queue_reset = true;
7980                                 break;
7981                         }
7982                         break;
7983                 case 224: /* SDMA trap event */
7984                         /* XXX check the bitfield order! */
7985                         me_id = (ring_id & 0x3) >> 0;
7986                         queue_id = (ring_id & 0xc) >> 2;
7987                         DRM_DEBUG("IH: SDMA trap\n");
7988                         switch (me_id) {
7989                         case 0:
7990                                 switch (queue_id) {
7991                                 case 0:
7992                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7993                                         break;
7994                                 case 1:
7995                                         /* XXX compute */
7996                                         break;
7997                                 case 2:
7998                                         /* XXX compute */
7999                                         break;
8000                                 }
8001                                 break;
8002                         case 1:
8003                                 switch (queue_id) {
8004                                 case 0:
8005                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8006                                         break;
8007                                 case 1:
8008                                         /* XXX compute */
8009                                         break;
8010                                 case 2:
8011                                         /* XXX compute */
8012                                         break;
8013                                 }
8014                                 break;
8015                         }
8016                         break;
8017                 case 230: /* thermal low to high */
8018                         DRM_DEBUG("IH: thermal low to high\n");
8019                         rdev->pm.dpm.thermal.high_to_low = false;
8020                         queue_thermal = true;
8021                         break;
8022                 case 231: /* thermal high to low */
8023                         DRM_DEBUG("IH: thermal high to low\n");
8024                         rdev->pm.dpm.thermal.high_to_low = true;
8025                         queue_thermal = true;
8026                         break;
8027                 case 233: /* GUI IDLE */
8028                         DRM_DEBUG("IH: GUI idle\n");
8029                         break;
8030                 case 241: /* SDMA Privileged inst */
8031                 case 247: /* SDMA Privileged inst */
8032                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8033                         /* XXX check the bitfield order! */
8034                         me_id = (ring_id & 0x3) >> 0;
8035                         queue_id = (ring_id & 0xc) >> 2;
8036                         switch (me_id) {
8037                         case 0:
8038                                 switch (queue_id) {
8039                                 case 0:
8040                                         queue_reset = true;
8041                                         break;
8042                                 case 1:
8043                                         /* XXX compute */
8044                                         queue_reset = true;
8045                                         break;
8046                                 case 2:
8047                                         /* XXX compute */
8048                                         queue_reset = true;
8049                                         break;
8050                                 }
8051                                 break;
8052                         case 1:
8053                                 switch (queue_id) {
8054                                 case 0:
8055                                         queue_reset = true;
8056                                         break;
8057                                 case 1:
8058                                         /* XXX compute */
8059                                         queue_reset = true;
8060                                         break;
8061                                 case 2:
8062                                         /* XXX compute */
8063                                         queue_reset = true;
8064                                         break;
8065                                 }
8066                                 break;
8067                         }
8068                         break;
8069                 default:
8070                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8071                         break;
8072                 }
8073
8074                 /* wptr/rptr are in bytes! */
8075                 rptr += 16;
8076                 rptr &= rdev->ih.ptr_mask;
8077                 WREG32(IH_RB_RPTR, rptr);
8078         }
8079         if (queue_dp)
8080                 schedule_work(&rdev->dp_work);
8081         if (queue_hotplug)
8082                 schedule_delayed_work(&rdev->hotplug_work, 0);
8083         if (queue_reset) {
8084                 rdev->needs_reset = true;
8085                 wake_up_all(&rdev->fence_queue);
8086         }
8087         if (queue_thermal)
8088                 schedule_work(&rdev->pm.dpm.thermal.work);
8089         rdev->ih.rptr = rptr;
8090         atomic_set(&rdev->ih.lock, 0);
8091
8092         /* make sure wptr hasn't changed while processing */
8093         wptr = cik_get_ih_wptr(rdev);
8094         if (wptr != rptr)
8095                 goto restart_ih;
8096
8097         return IRQ_HANDLED;
8098 }
8099
8100 /*
8101  * startup/shutdown callbacks
8102  */
8103 static void cik_uvd_init(struct radeon_device *rdev)
8104 {
8105         int r;
8106
8107         if (!rdev->has_uvd)
8108                 return;
8109
8110         r = radeon_uvd_init(rdev);
8111         if (r) {
8112                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8113                 /*
8114                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8115                  * to early fails cik_uvd_start() and thus nothing happens
8116                  * there. So it is pointless to try to go through that code
8117                  * hence why we disable uvd here.
8118                  */
8119                 rdev->has_uvd = 0;
8120                 return;
8121         }
8122         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8123         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8124 }
8125
8126 static void cik_uvd_start(struct radeon_device *rdev)
8127 {
8128         int r;
8129
8130         if (!rdev->has_uvd)
8131                 return;
8132
8133         r = radeon_uvd_resume(rdev);
8134         if (r) {
8135                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8136                 goto error;
8137         }
8138         r = uvd_v4_2_resume(rdev);
8139         if (r) {
8140                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8141                 goto error;
8142         }
8143         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8144         if (r) {
8145                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8146                 goto error;
8147         }
8148         return;
8149
8150 error:
8151         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8152 }
8153
8154 static void cik_uvd_resume(struct radeon_device *rdev)
8155 {
8156         struct radeon_ring *ring;
8157         int r;
8158
8159         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8160                 return;
8161
8162         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8163         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8164         if (r) {
8165                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8166                 return;
8167         }
8168         r = uvd_v1_0_init(rdev);
8169         if (r) {
8170                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8171                 return;
8172         }
8173 }
8174
8175 static void cik_vce_init(struct radeon_device *rdev)
8176 {
8177         int r;
8178
8179         if (!rdev->has_vce)
8180                 return;
8181
8182         r = radeon_vce_init(rdev);
8183         if (r) {
8184                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8185                 /*
8186                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8187                  * to early fails cik_vce_start() and thus nothing happens
8188                  * there. So it is pointless to try to go through that code
8189                  * hence why we disable vce here.
8190                  */
8191                 rdev->has_vce = 0;
8192                 return;
8193         }
8194         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8195         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8196         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8197         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8198 }
8199
8200 static void cik_vce_start(struct radeon_device *rdev)
8201 {
8202         int r;
8203
8204         if (!rdev->has_vce)
8205                 return;
8206
8207         r = radeon_vce_resume(rdev);
8208         if (r) {
8209                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8210                 goto error;
8211         }
8212         r = vce_v2_0_resume(rdev);
8213         if (r) {
8214                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8215                 goto error;
8216         }
8217         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8218         if (r) {
8219                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8220                 goto error;
8221         }
8222         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8223         if (r) {
8224                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8225                 goto error;
8226         }
8227         return;
8228
8229 error:
8230         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8231         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8232 }
8233
8234 static void cik_vce_resume(struct radeon_device *rdev)
8235 {
8236         struct radeon_ring *ring;
8237         int r;
8238
8239         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8240                 return;
8241
8242         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8243         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8244         if (r) {
8245                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8246                 return;
8247         }
8248         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8249         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8250         if (r) {
8251                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8252                 return;
8253         }
8254         r = vce_v1_0_init(rdev);
8255         if (r) {
8256                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8257                 return;
8258         }
8259 }
8260
8261 /**
8262  * cik_startup - program the asic to a functional state
8263  *
8264  * @rdev: radeon_device pointer
8265  *
8266  * Programs the asic to a functional state (CIK).
8267  * Called by cik_init() and cik_resume().
8268  * Returns 0 for success, error for failure.
8269  */
8270 static int cik_startup(struct radeon_device *rdev)
8271 {
8272         struct radeon_ring *ring;
8273         u32 nop;
8274         int r;
8275
8276         /* enable pcie gen2/3 link */
8277         cik_pcie_gen3_enable(rdev);
8278         /* enable aspm */
8279         cik_program_aspm(rdev);
8280
8281         /* scratch needs to be initialized before MC */
8282         r = r600_vram_scratch_init(rdev);
8283         if (r)
8284                 return r;
8285
8286         cik_mc_program(rdev);
8287
8288         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8289                 r = ci_mc_load_microcode(rdev);
8290                 if (r) {
8291                         DRM_ERROR("Failed to load MC firmware!\n");
8292                         return r;
8293                 }
8294         }
8295
8296         r = cik_pcie_gart_enable(rdev);
8297         if (r)
8298                 return r;
8299         cik_gpu_init(rdev);
8300
8301         /* allocate rlc buffers */
8302         if (rdev->flags & RADEON_IS_IGP) {
8303                 if (rdev->family == CHIP_KAVERI) {
8304                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8305                         rdev->rlc.reg_list_size =
8306                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8307                 } else {
8308                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8309                         rdev->rlc.reg_list_size =
8310                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8311                 }
8312         }
8313         rdev->rlc.cs_data = ci_cs_data;
8314         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8315         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8316         r = sumo_rlc_init(rdev);
8317         if (r) {
8318                 DRM_ERROR("Failed to init rlc BOs!\n");
8319                 return r;
8320         }
8321
8322         /* allocate wb buffer */
8323         r = radeon_wb_init(rdev);
8324         if (r)
8325                 return r;
8326
8327         /* allocate mec buffers */
8328         r = cik_mec_init(rdev);
8329         if (r) {
8330                 DRM_ERROR("Failed to init MEC BOs!\n");
8331                 return r;
8332         }
8333
8334         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8335         if (r) {
8336                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8337                 return r;
8338         }
8339
8340         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8341         if (r) {
8342                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8343                 return r;
8344         }
8345
8346         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8347         if (r) {
8348                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8349                 return r;
8350         }
8351
8352         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8353         if (r) {
8354                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8355                 return r;
8356         }
8357
8358         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8359         if (r) {
8360                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8361                 return r;
8362         }
8363
8364         cik_uvd_start(rdev);
8365         cik_vce_start(rdev);
8366
8367         /* Enable IRQ */
8368         if (!rdev->irq.installed) {
8369                 r = radeon_irq_kms_init(rdev);
8370                 if (r)
8371                         return r;
8372         }
8373
8374         r = cik_irq_init(rdev);
8375         if (r) {
8376                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8377                 radeon_irq_kms_fini(rdev);
8378                 return r;
8379         }
8380         cik_irq_set(rdev);
8381
8382         if (rdev->family == CHIP_HAWAII) {
8383                 if (rdev->new_fw)
8384                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8385                 else
8386                         nop = RADEON_CP_PACKET2;
8387         } else {
8388                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8389         }
8390
8391         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8392         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8393                              nop);
8394         if (r)
8395                 return r;
8396
8397         /* set up the compute queues */
8398         /* type-2 packets are deprecated on MEC, use type-3 instead */
8399         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8400         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8401                              nop);
8402         if (r)
8403                 return r;
8404         ring->me = 1; /* first MEC */
8405         ring->pipe = 0; /* first pipe */
8406         ring->queue = 0; /* first queue */
8407         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8408
8409         /* type-2 packets are deprecated on MEC, use type-3 instead */
8410         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8411         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8412                              nop);
8413         if (r)
8414                 return r;
8415         /* dGPU only have 1 MEC */
8416         ring->me = 1; /* first MEC */
8417         ring->pipe = 0; /* first pipe */
8418         ring->queue = 1; /* second queue */
8419         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8420
8421         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8422         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8423                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8424         if (r)
8425                 return r;
8426
8427         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8428         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8429                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8430         if (r)
8431                 return r;
8432
8433         r = cik_cp_resume(rdev);
8434         if (r)
8435                 return r;
8436
8437         r = cik_sdma_resume(rdev);
8438         if (r)
8439                 return r;
8440
8441         cik_uvd_resume(rdev);
8442         cik_vce_resume(rdev);
8443
8444         r = radeon_ib_pool_init(rdev);
8445         if (r) {
8446                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8447                 return r;
8448         }
8449
8450         r = radeon_vm_manager_init(rdev);
8451         if (r) {
8452                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8453                 return r;
8454         }
8455
8456         r = radeon_audio_init(rdev);
8457         if (r)
8458                 return r;
8459
8460         return 0;
8461 }
8462
8463 /**
8464  * cik_resume - resume the asic to a functional state
8465  *
8466  * @rdev: radeon_device pointer
8467  *
8468  * Programs the asic to a functional state (CIK).
8469  * Called at resume.
8470  * Returns 0 for success, error for failure.
8471  */
8472 int cik_resume(struct radeon_device *rdev)
8473 {
8474         int r;
8475
8476         /* post card */
8477         atom_asic_init(rdev->mode_info.atom_context);
8478
8479         /* init golden registers */
8480         cik_init_golden_registers(rdev);
8481
8482         if (rdev->pm.pm_method == PM_METHOD_DPM)
8483                 radeon_pm_resume(rdev);
8484
8485         rdev->accel_working = true;
8486         r = cik_startup(rdev);
8487         if (r) {
8488                 DRM_ERROR("cik startup failed on resume\n");
8489                 rdev->accel_working = false;
8490                 return r;
8491         }
8492
8493         return r;
8494
8495 }
8496
8497 /**
8498  * cik_suspend - suspend the asic
8499  *
8500  * @rdev: radeon_device pointer
8501  *
8502  * Bring the chip into a state suitable for suspend (CIK).
8503  * Called at suspend.
8504  * Returns 0 for success.
8505  */
8506 int cik_suspend(struct radeon_device *rdev)
8507 {
8508         radeon_pm_suspend(rdev);
8509         radeon_audio_fini(rdev);
8510         radeon_vm_manager_fini(rdev);
8511         cik_cp_enable(rdev, false);
8512         cik_sdma_enable(rdev, false);
8513         if (rdev->has_uvd) {
8514                 uvd_v1_0_fini(rdev);
8515                 radeon_uvd_suspend(rdev);
8516         }
8517         if (rdev->has_vce)
8518                 radeon_vce_suspend(rdev);
8519         cik_fini_pg(rdev);
8520         cik_fini_cg(rdev);
8521         cik_irq_suspend(rdev);
8522         radeon_wb_disable(rdev);
8523         cik_pcie_gart_disable(rdev);
8524         return 0;
8525 }
8526
8527 /* Plan is to move initialization in that function and use
8528  * helper function so that radeon_device_init pretty much
8529  * do nothing more than calling asic specific function. This
8530  * should also allow to remove a bunch of callback function
8531  * like vram_info.
8532  */
8533 /**
8534  * cik_init - asic specific driver and hw init
8535  *
8536  * @rdev: radeon_device pointer
8537  *
8538  * Setup asic specific driver variables and program the hw
8539  * to a functional state (CIK).
8540  * Called at driver startup.
8541  * Returns 0 for success, errors for failure.
8542  */
8543 int cik_init(struct radeon_device *rdev)
8544 {
8545         struct radeon_ring *ring;
8546         int r;
8547
8548         /* Read BIOS */
8549         if (!radeon_get_bios(rdev)) {
8550                 if (ASIC_IS_AVIVO(rdev))
8551                         return -EINVAL;
8552         }
8553         /* Must be an ATOMBIOS */
8554         if (!rdev->is_atom_bios) {
8555                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8556                 return -EINVAL;
8557         }
8558         r = radeon_atombios_init(rdev);
8559         if (r)
8560                 return r;
8561
8562         /* Post card if necessary */
8563         if (!radeon_card_posted(rdev)) {
8564                 if (!rdev->bios) {
8565                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8566                         return -EINVAL;
8567                 }
8568                 DRM_INFO("GPU not posted. posting now...\n");
8569                 atom_asic_init(rdev->mode_info.atom_context);
8570         }
8571         /* init golden registers */
8572         cik_init_golden_registers(rdev);
8573         /* Initialize scratch registers */
8574         cik_scratch_init(rdev);
8575         /* Initialize surface registers */
8576         radeon_surface_init(rdev);
8577         /* Initialize clocks */
8578         radeon_get_clock_info(rdev->ddev);
8579
8580         /* Fence driver */
8581         r = radeon_fence_driver_init(rdev);
8582         if (r)
8583                 return r;
8584
8585         /* initialize memory controller */
8586         r = cik_mc_init(rdev);
8587         if (r)
8588                 return r;
8589         /* Memory manager */
8590         r = radeon_bo_init(rdev);
8591         if (r)
8592                 return r;
8593
8594         if (rdev->flags & RADEON_IS_IGP) {
8595                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8596                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8597                         r = cik_init_microcode(rdev);
8598                         if (r) {
8599                                 DRM_ERROR("Failed to load firmware!\n");
8600                                 return r;
8601                         }
8602                 }
8603         } else {
8604                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8605                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8606                     !rdev->mc_fw) {
8607                         r = cik_init_microcode(rdev);
8608                         if (r) {
8609                                 DRM_ERROR("Failed to load firmware!\n");
8610                                 return r;
8611                         }
8612                 }
8613         }
8614
8615         /* Initialize power management */
8616         radeon_pm_init(rdev);
8617
8618         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8619         ring->ring_obj = NULL;
8620         r600_ring_init(rdev, ring, 1024 * 1024);
8621
8622         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8623         ring->ring_obj = NULL;
8624         r600_ring_init(rdev, ring, 1024 * 1024);
8625         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8626         if (r)
8627                 return r;
8628
8629         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8630         ring->ring_obj = NULL;
8631         r600_ring_init(rdev, ring, 1024 * 1024);
8632         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8633         if (r)
8634                 return r;
8635
8636         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8637         ring->ring_obj = NULL;
8638         r600_ring_init(rdev, ring, 256 * 1024);
8639
8640         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8641         ring->ring_obj = NULL;
8642         r600_ring_init(rdev, ring, 256 * 1024);
8643
8644         cik_uvd_init(rdev);
8645         cik_vce_init(rdev);
8646
8647         rdev->ih.ring_obj = NULL;
8648         r600_ih_ring_init(rdev, 64 * 1024);
8649
8650         r = r600_pcie_gart_init(rdev);
8651         if (r)
8652                 return r;
8653
8654         rdev->accel_working = true;
8655         r = cik_startup(rdev);
8656         if (r) {
8657                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8658                 cik_cp_fini(rdev);
8659                 cik_sdma_fini(rdev);
8660                 cik_irq_fini(rdev);
8661                 sumo_rlc_fini(rdev);
8662                 cik_mec_fini(rdev);
8663                 radeon_wb_fini(rdev);
8664                 radeon_ib_pool_fini(rdev);
8665                 radeon_vm_manager_fini(rdev);
8666                 radeon_irq_kms_fini(rdev);
8667                 cik_pcie_gart_fini(rdev);
8668                 rdev->accel_working = false;
8669         }
8670
8671         /* Don't start up if the MC ucode is missing.
8672          * The default clocks and voltages before the MC ucode
8673          * is loaded are not suffient for advanced operations.
8674          */
8675         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8676                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8677                 return -EINVAL;
8678         }
8679
8680         return 0;
8681 }
8682
8683 /**
8684  * cik_fini - asic specific driver and hw fini
8685  *
8686  * @rdev: radeon_device pointer
8687  *
8688  * Tear down the asic specific driver variables and program the hw
8689  * to an idle state (CIK).
8690  * Called at driver unload.
8691  */
8692 void cik_fini(struct radeon_device *rdev)
8693 {
8694         radeon_pm_fini(rdev);
8695         cik_cp_fini(rdev);
8696         cik_sdma_fini(rdev);
8697         cik_fini_pg(rdev);
8698         cik_fini_cg(rdev);
8699         cik_irq_fini(rdev);
8700         sumo_rlc_fini(rdev);
8701         cik_mec_fini(rdev);
8702         radeon_wb_fini(rdev);
8703         radeon_vm_manager_fini(rdev);
8704         radeon_ib_pool_fini(rdev);
8705         radeon_irq_kms_fini(rdev);
8706         uvd_v1_0_fini(rdev);
8707         radeon_uvd_fini(rdev);
8708         radeon_vce_fini(rdev);
8709         cik_pcie_gart_fini(rdev);
8710         r600_vram_scratch_fini(rdev);
8711         radeon_gem_fini(rdev);
8712         radeon_fence_driver_fini(rdev);
8713         radeon_bo_fini(rdev);
8714         radeon_atombios_fini(rdev);
8715         kfree(rdev->bios);
8716         rdev->bios = NULL;
8717 }
8718
8719 void dce8_program_fmt(struct drm_encoder *encoder)
8720 {
8721         struct drm_device *dev = encoder->dev;
8722         struct radeon_device *rdev = dev->dev_private;
8723         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8724         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8725         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8726         int bpc = 0;
8727         u32 tmp = 0;
8728         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8729
8730         if (connector) {
8731                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8732                 bpc = radeon_get_monitor_bpc(connector);
8733                 dither = radeon_connector->dither;
8734         }
8735
8736         /* LVDS/eDP FMT is set up by atom */
8737         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8738                 return;
8739
8740         /* not needed for analog */
8741         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8742             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8743                 return;
8744
8745         if (bpc == 0)
8746                 return;
8747
8748         switch (bpc) {
8749         case 6:
8750                 if (dither == RADEON_FMT_DITHER_ENABLE)
8751                         /* XXX sort out optimal dither settings */
8752                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8753                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8754                 else
8755                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8756                 break;
8757         case 8:
8758                 if (dither == RADEON_FMT_DITHER_ENABLE)
8759                         /* XXX sort out optimal dither settings */
8760                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8761                                 FMT_RGB_RANDOM_ENABLE |
8762                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8763                 else
8764                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8765                 break;
8766         case 10:
8767                 if (dither == RADEON_FMT_DITHER_ENABLE)
8768                         /* XXX sort out optimal dither settings */
8769                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8770                                 FMT_RGB_RANDOM_ENABLE |
8771                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8772                 else
8773                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8774                 break;
8775         default:
8776                 /* not needed */
8777                 break;
8778         }
8779
8780         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8781 }
8782
8783 /* display watermark setup */
8784 /**
8785  * dce8_line_buffer_adjust - Set up the line buffer
8786  *
8787  * @rdev: radeon_device pointer
8788  * @radeon_crtc: the selected display controller
8789  * @mode: the current display mode on the selected display
8790  * controller
8791  *
8792  * Setup up the line buffer allocation for
8793  * the selected display controller (CIK).
8794  * Returns the line buffer size in pixels.
8795  */
8796 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8797                                    struct radeon_crtc *radeon_crtc,
8798                                    struct drm_display_mode *mode)
8799 {
8800         u32 tmp, buffer_alloc, i;
8801         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8802         /*
8803          * Line Buffer Setup
8804          * There are 6 line buffers, one for each display controllers.
8805          * There are 3 partitions per LB. Select the number of partitions
8806          * to enable based on the display width.  For display widths larger
8807          * than 4096, you need use to use 2 display controllers and combine
8808          * them using the stereo blender.
8809          */
8810         if (radeon_crtc->base.enabled && mode) {
8811                 if (mode->crtc_hdisplay < 1920) {
8812                         tmp = 1;
8813                         buffer_alloc = 2;
8814                 } else if (mode->crtc_hdisplay < 2560) {
8815                         tmp = 2;
8816                         buffer_alloc = 2;
8817                 } else if (mode->crtc_hdisplay < 4096) {
8818                         tmp = 0;
8819                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8820                 } else {
8821                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8822                         tmp = 0;
8823                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8824                 }
8825         } else {
8826                 tmp = 1;
8827                 buffer_alloc = 0;
8828         }
8829
8830         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8831                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8832
8833         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8834                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8835         for (i = 0; i < rdev->usec_timeout; i++) {
8836                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8837                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8838                         break;
8839                 udelay(1);
8840         }
8841
8842         if (radeon_crtc->base.enabled && mode) {
8843                 switch (tmp) {
8844                 case 0:
8845                 default:
8846                         return 4096 * 2;
8847                 case 1:
8848                         return 1920 * 2;
8849                 case 2:
8850                         return 2560 * 2;
8851                 }
8852         }
8853
8854         /* controller not enabled, so no lb used */
8855         return 0;
8856 }
8857
8858 /**
8859  * cik_get_number_of_dram_channels - get the number of dram channels
8860  *
8861  * @rdev: radeon_device pointer
8862  *
8863  * Look up the number of video ram channels (CIK).
8864  * Used for display watermark bandwidth calculations
8865  * Returns the number of dram channels
8866  */
8867 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8868 {
8869         u32 tmp = RREG32(MC_SHARED_CHMAP);
8870
8871         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8872         case 0:
8873         default:
8874                 return 1;
8875         case 1:
8876                 return 2;
8877         case 2:
8878                 return 4;
8879         case 3:
8880                 return 8;
8881         case 4:
8882                 return 3;
8883         case 5:
8884                 return 6;
8885         case 6:
8886                 return 10;
8887         case 7:
8888                 return 12;
8889         case 8:
8890                 return 16;
8891         }
8892 }
8893
8894 struct dce8_wm_params {
8895         u32 dram_channels; /* number of dram channels */
8896         u32 yclk;          /* bandwidth per dram data pin in kHz */
8897         u32 sclk;          /* engine clock in kHz */
8898         u32 disp_clk;      /* display clock in kHz */
8899         u32 src_width;     /* viewport width */
8900         u32 active_time;   /* active display time in ns */
8901         u32 blank_time;    /* blank time in ns */
8902         bool interlaced;    /* mode is interlaced */
8903         fixed20_12 vsc;    /* vertical scale ratio */
8904         u32 num_heads;     /* number of active crtcs */
8905         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8906         u32 lb_size;       /* line buffer allocated to pipe */
8907         u32 vtaps;         /* vertical scaler taps */
8908 };
8909
8910 /**
8911  * dce8_dram_bandwidth - get the dram bandwidth
8912  *
8913  * @wm: watermark calculation data
8914  *
8915  * Calculate the raw dram bandwidth (CIK).
8916  * Used for display watermark bandwidth calculations
8917  * Returns the dram bandwidth in MBytes/s
8918  */
8919 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8920 {
8921         /* Calculate raw DRAM Bandwidth */
8922         fixed20_12 dram_efficiency; /* 0.7 */
8923         fixed20_12 yclk, dram_channels, bandwidth;
8924         fixed20_12 a;
8925
8926         a.full = dfixed_const(1000);
8927         yclk.full = dfixed_const(wm->yclk);
8928         yclk.full = dfixed_div(yclk, a);
8929         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8930         a.full = dfixed_const(10);
8931         dram_efficiency.full = dfixed_const(7);
8932         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8933         bandwidth.full = dfixed_mul(dram_channels, yclk);
8934         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8935
8936         return dfixed_trunc(bandwidth);
8937 }
8938
8939 /**
8940  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8941  *
8942  * @wm: watermark calculation data
8943  *
8944  * Calculate the dram bandwidth used for display (CIK).
8945  * Used for display watermark bandwidth calculations
8946  * Returns the dram bandwidth for display in MBytes/s
8947  */
8948 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8949 {
8950         /* Calculate DRAM Bandwidth and the part allocated to display. */
8951         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8952         fixed20_12 yclk, dram_channels, bandwidth;
8953         fixed20_12 a;
8954
8955         a.full = dfixed_const(1000);
8956         yclk.full = dfixed_const(wm->yclk);
8957         yclk.full = dfixed_div(yclk, a);
8958         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8959         a.full = dfixed_const(10);
8960         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8961         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8962         bandwidth.full = dfixed_mul(dram_channels, yclk);
8963         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8964
8965         return dfixed_trunc(bandwidth);
8966 }
8967
8968 /**
8969  * dce8_data_return_bandwidth - get the data return bandwidth
8970  *
8971  * @wm: watermark calculation data
8972  *
8973  * Calculate the data return bandwidth used for display (CIK).
8974  * Used for display watermark bandwidth calculations
8975  * Returns the data return bandwidth in MBytes/s
8976  */
8977 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8978 {
8979         /* Calculate the display Data return Bandwidth */
8980         fixed20_12 return_efficiency; /* 0.8 */
8981         fixed20_12 sclk, bandwidth;
8982         fixed20_12 a;
8983
8984         a.full = dfixed_const(1000);
8985         sclk.full = dfixed_const(wm->sclk);
8986         sclk.full = dfixed_div(sclk, a);
8987         a.full = dfixed_const(10);
8988         return_efficiency.full = dfixed_const(8);
8989         return_efficiency.full = dfixed_div(return_efficiency, a);
8990         a.full = dfixed_const(32);
8991         bandwidth.full = dfixed_mul(a, sclk);
8992         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8993
8994         return dfixed_trunc(bandwidth);
8995 }
8996
8997 /**
8998  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8999  *
9000  * @wm: watermark calculation data
9001  *
9002  * Calculate the dmif bandwidth used for display (CIK).
9003  * Used for display watermark bandwidth calculations
9004  * Returns the dmif bandwidth in MBytes/s
9005  */
9006 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9007 {
9008         /* Calculate the DMIF Request Bandwidth */
9009         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9010         fixed20_12 disp_clk, bandwidth;
9011         fixed20_12 a, b;
9012
9013         a.full = dfixed_const(1000);
9014         disp_clk.full = dfixed_const(wm->disp_clk);
9015         disp_clk.full = dfixed_div(disp_clk, a);
9016         a.full = dfixed_const(32);
9017         b.full = dfixed_mul(a, disp_clk);
9018
9019         a.full = dfixed_const(10);
9020         disp_clk_request_efficiency.full = dfixed_const(8);
9021         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9022
9023         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9024
9025         return dfixed_trunc(bandwidth);
9026 }
9027
9028 /**
9029  * dce8_available_bandwidth - get the min available bandwidth
9030  *
9031  * @wm: watermark calculation data
9032  *
9033  * Calculate the min available bandwidth used for display (CIK).
9034  * Used for display watermark bandwidth calculations
9035  * Returns the min available bandwidth in MBytes/s
9036  */
9037 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9038 {
9039         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9040         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9041         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9042         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9043
9044         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9045 }
9046
9047 /**
9048  * dce8_average_bandwidth - get the average available bandwidth
9049  *
9050  * @wm: watermark calculation data
9051  *
9052  * Calculate the average available bandwidth used for display (CIK).
9053  * Used for display watermark bandwidth calculations
9054  * Returns the average available bandwidth in MBytes/s
9055  */
9056 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9057 {
9058         /* Calculate the display mode Average Bandwidth
9059          * DisplayMode should contain the source and destination dimensions,
9060          * timing, etc.
9061          */
9062         fixed20_12 bpp;
9063         fixed20_12 line_time;
9064         fixed20_12 src_width;
9065         fixed20_12 bandwidth;
9066         fixed20_12 a;
9067
9068         a.full = dfixed_const(1000);
9069         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9070         line_time.full = dfixed_div(line_time, a);
9071         bpp.full = dfixed_const(wm->bytes_per_pixel);
9072         src_width.full = dfixed_const(wm->src_width);
9073         bandwidth.full = dfixed_mul(src_width, bpp);
9074         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9075         bandwidth.full = dfixed_div(bandwidth, line_time);
9076
9077         return dfixed_trunc(bandwidth);
9078 }
9079
9080 /**
9081  * dce8_latency_watermark - get the latency watermark
9082  *
9083  * @wm: watermark calculation data
9084  *
9085  * Calculate the latency watermark (CIK).
9086  * Used for display watermark bandwidth calculations
9087  * Returns the latency watermark in ns
9088  */
9089 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9090 {
9091         /* First calculate the latency in ns */
9092         u32 mc_latency = 2000; /* 2000 ns. */
9093         u32 available_bandwidth = dce8_available_bandwidth(wm);
9094         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9095         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9096         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9097         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9098                 (wm->num_heads * cursor_line_pair_return_time);
9099         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9100         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9101         u32 tmp, dmif_size = 12288;
9102         fixed20_12 a, b, c;
9103
9104         if (wm->num_heads == 0)
9105                 return 0;
9106
9107         a.full = dfixed_const(2);
9108         b.full = dfixed_const(1);
9109         if ((wm->vsc.full > a.full) ||
9110             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9111             (wm->vtaps >= 5) ||
9112             ((wm->vsc.full >= a.full) && wm->interlaced))
9113                 max_src_lines_per_dst_line = 4;
9114         else
9115                 max_src_lines_per_dst_line = 2;
9116
9117         a.full = dfixed_const(available_bandwidth);
9118         b.full = dfixed_const(wm->num_heads);
9119         a.full = dfixed_div(a, b);
9120         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9121         tmp = min(dfixed_trunc(a), tmp);
9122
9123         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9124
9125         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9126         b.full = dfixed_const(1000);
9127         c.full = dfixed_const(lb_fill_bw);
9128         b.full = dfixed_div(c, b);
9129         a.full = dfixed_div(a, b);
9130         line_fill_time = dfixed_trunc(a);
9131
9132         if (line_fill_time < wm->active_time)
9133                 return latency;
9134         else
9135                 return latency + (line_fill_time - wm->active_time);
9136
9137 }
9138
9139 /**
9140  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9141  * average and available dram bandwidth
9142  *
9143  * @wm: watermark calculation data
9144  *
9145  * Check if the display average bandwidth fits in the display
9146  * dram bandwidth (CIK).
9147  * Used for display watermark bandwidth calculations
9148  * Returns true if the display fits, false if not.
9149  */
9150 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9151 {
9152         if (dce8_average_bandwidth(wm) <=
9153             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9154                 return true;
9155         else
9156                 return false;
9157 }
9158
9159 /**
9160  * dce8_average_bandwidth_vs_available_bandwidth - check
9161  * average and available bandwidth
9162  *
9163  * @wm: watermark calculation data
9164  *
9165  * Check if the display average bandwidth fits in the display
9166  * available bandwidth (CIK).
9167  * Used for display watermark bandwidth calculations
9168  * Returns true if the display fits, false if not.
9169  */
9170 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9171 {
9172         if (dce8_average_bandwidth(wm) <=
9173             (dce8_available_bandwidth(wm) / wm->num_heads))
9174                 return true;
9175         else
9176                 return false;
9177 }
9178
9179 /**
9180  * dce8_check_latency_hiding - check latency hiding
9181  *
9182  * @wm: watermark calculation data
9183  *
9184  * Check latency hiding (CIK).
9185  * Used for display watermark bandwidth calculations
9186  * Returns true if the display fits, false if not.
9187  */
9188 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9189 {
9190         u32 lb_partitions = wm->lb_size / wm->src_width;
9191         u32 line_time = wm->active_time + wm->blank_time;
9192         u32 latency_tolerant_lines;
9193         u32 latency_hiding;
9194         fixed20_12 a;
9195
9196         a.full = dfixed_const(1);
9197         if (wm->vsc.full > a.full)
9198                 latency_tolerant_lines = 1;
9199         else {
9200                 if (lb_partitions <= (wm->vtaps + 1))
9201                         latency_tolerant_lines = 1;
9202                 else
9203                         latency_tolerant_lines = 2;
9204         }
9205
9206         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9207
9208         if (dce8_latency_watermark(wm) <= latency_hiding)
9209                 return true;
9210         else
9211                 return false;
9212 }
9213
9214 /**
9215  * dce8_program_watermarks - program display watermarks
9216  *
9217  * @rdev: radeon_device pointer
9218  * @radeon_crtc: the selected display controller
9219  * @lb_size: line buffer size
9220  * @num_heads: number of display controllers in use
9221  *
9222  * Calculate and program the display watermarks for the
9223  * selected display controller (CIK).
9224  */
9225 static void dce8_program_watermarks(struct radeon_device *rdev,
9226                                     struct radeon_crtc *radeon_crtc,
9227                                     u32 lb_size, u32 num_heads)
9228 {
9229         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9230         struct dce8_wm_params wm_low, wm_high;
9231         u32 active_time;
9232         u32 line_time = 0;
9233         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9234         u32 tmp, wm_mask;
9235
9236         if (radeon_crtc->base.enabled && num_heads && mode) {
9237                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9238                                             (u32)mode->clock);
9239                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9240                                           (u32)mode->clock);
9241                 line_time = min(line_time, (u32)65535);
9242
9243                 /* watermark for high clocks */
9244                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9245                     rdev->pm.dpm_enabled) {
9246                         wm_high.yclk =
9247                                 radeon_dpm_get_mclk(rdev, false) * 10;
9248                         wm_high.sclk =
9249                                 radeon_dpm_get_sclk(rdev, false) * 10;
9250                 } else {
9251                         wm_high.yclk = rdev->pm.current_mclk * 10;
9252                         wm_high.sclk = rdev->pm.current_sclk * 10;
9253                 }
9254
9255                 wm_high.disp_clk = mode->clock;
9256                 wm_high.src_width = mode->crtc_hdisplay;
9257                 wm_high.active_time = active_time;
9258                 wm_high.blank_time = line_time - wm_high.active_time;
9259                 wm_high.interlaced = false;
9260                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9261                         wm_high.interlaced = true;
9262                 wm_high.vsc = radeon_crtc->vsc;
9263                 wm_high.vtaps = 1;
9264                 if (radeon_crtc->rmx_type != RMX_OFF)
9265                         wm_high.vtaps = 2;
9266                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9267                 wm_high.lb_size = lb_size;
9268                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9269                 wm_high.num_heads = num_heads;
9270
9271                 /* set for high clocks */
9272                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9273
9274                 /* possibly force display priority to high */
9275                 /* should really do this at mode validation time... */
9276                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9277                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9278                     !dce8_check_latency_hiding(&wm_high) ||
9279                     (rdev->disp_priority == 2)) {
9280                         DRM_DEBUG_KMS("force priority to high\n");
9281                 }
9282
9283                 /* watermark for low clocks */
9284                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9285                     rdev->pm.dpm_enabled) {
9286                         wm_low.yclk =
9287                                 radeon_dpm_get_mclk(rdev, true) * 10;
9288                         wm_low.sclk =
9289                                 radeon_dpm_get_sclk(rdev, true) * 10;
9290                 } else {
9291                         wm_low.yclk = rdev->pm.current_mclk * 10;
9292                         wm_low.sclk = rdev->pm.current_sclk * 10;
9293                 }
9294
9295                 wm_low.disp_clk = mode->clock;
9296                 wm_low.src_width = mode->crtc_hdisplay;
9297                 wm_low.active_time = active_time;
9298                 wm_low.blank_time = line_time - wm_low.active_time;
9299                 wm_low.interlaced = false;
9300                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9301                         wm_low.interlaced = true;
9302                 wm_low.vsc = radeon_crtc->vsc;
9303                 wm_low.vtaps = 1;
9304                 if (radeon_crtc->rmx_type != RMX_OFF)
9305                         wm_low.vtaps = 2;
9306                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9307                 wm_low.lb_size = lb_size;
9308                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9309                 wm_low.num_heads = num_heads;
9310
9311                 /* set for low clocks */
9312                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9313
9314                 /* possibly force display priority to high */
9315                 /* should really do this at mode validation time... */
9316                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9317                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9318                     !dce8_check_latency_hiding(&wm_low) ||
9319                     (rdev->disp_priority == 2)) {
9320                         DRM_DEBUG_KMS("force priority to high\n");
9321                 }
9322
9323                 /* Save number of lines the linebuffer leads before the scanout */
9324                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9325         }
9326
9327         /* select wm A */
9328         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9329         tmp = wm_mask;
9330         tmp &= ~LATENCY_WATERMARK_MASK(3);
9331         tmp |= LATENCY_WATERMARK_MASK(1);
9332         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9333         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9334                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9335                 LATENCY_HIGH_WATERMARK(line_time)));
9336         /* select wm B */
9337         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9338         tmp &= ~LATENCY_WATERMARK_MASK(3);
9339         tmp |= LATENCY_WATERMARK_MASK(2);
9340         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9341         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9342                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9343                 LATENCY_HIGH_WATERMARK(line_time)));
9344         /* restore original selection */
9345         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9346
9347         /* save values for DPM */
9348         radeon_crtc->line_time = line_time;
9349         radeon_crtc->wm_high = latency_watermark_a;
9350         radeon_crtc->wm_low = latency_watermark_b;
9351 }
9352
9353 /**
9354  * dce8_bandwidth_update - program display watermarks
9355  *
9356  * @rdev: radeon_device pointer
9357  *
9358  * Calculate and program the display watermarks and line
9359  * buffer allocation (CIK).
9360  */
9361 void dce8_bandwidth_update(struct radeon_device *rdev)
9362 {
9363         struct drm_display_mode *mode = NULL;
9364         u32 num_heads = 0, lb_size;
9365         int i;
9366
9367         if (!rdev->mode_info.mode_config_initialized)
9368                 return;
9369
9370         radeon_update_display_priority(rdev);
9371
9372         for (i = 0; i < rdev->num_crtc; i++) {
9373                 if (rdev->mode_info.crtcs[i]->base.enabled)
9374                         num_heads++;
9375         }
9376         for (i = 0; i < rdev->num_crtc; i++) {
9377                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9378                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9379                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9380         }
9381 }
9382
9383 /**
9384  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9385  *
9386  * @rdev: radeon_device pointer
9387  *
9388  * Fetches a GPU clock counter snapshot (SI).
9389  * Returns the 64 bit clock counter snapshot.
9390  */
9391 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9392 {
9393         uint64_t clock;
9394
9395         mutex_lock(&rdev->gpu_clock_mutex);
9396         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9397         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9398                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9399         mutex_unlock(&rdev->gpu_clock_mutex);
9400         return clock;
9401 }
9402
9403 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9404                              u32 cntl_reg, u32 status_reg)
9405 {
9406         int r, i;
9407         struct atom_clock_dividers dividers;
9408         uint32_t tmp;
9409
9410         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9411                                            clock, false, &dividers);
9412         if (r)
9413                 return r;
9414
9415         tmp = RREG32_SMC(cntl_reg);
9416         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9417         tmp |= dividers.post_divider;
9418         WREG32_SMC(cntl_reg, tmp);
9419
9420         for (i = 0; i < 100; i++) {
9421                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9422                         break;
9423                 mdelay(10);
9424         }
9425         if (i == 100)
9426                 return -ETIMEDOUT;
9427
9428         return 0;
9429 }
9430
9431 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9432 {
9433         int r = 0;
9434
9435         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9436         if (r)
9437                 return r;
9438
9439         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9440         return r;
9441 }
9442
9443 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9444 {
9445         int r, i;
9446         struct atom_clock_dividers dividers;
9447         u32 tmp;
9448
9449         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9450                                            ecclk, false, &dividers);
9451         if (r)
9452                 return r;
9453
9454         for (i = 0; i < 100; i++) {
9455                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9456                         break;
9457                 mdelay(10);
9458         }
9459         if (i == 100)
9460                 return -ETIMEDOUT;
9461
9462         tmp = RREG32_SMC(CG_ECLK_CNTL);
9463         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9464         tmp |= dividers.post_divider;
9465         WREG32_SMC(CG_ECLK_CNTL, tmp);
9466
9467         for (i = 0; i < 100; i++) {
9468                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9469                         break;
9470                 mdelay(10);
9471         }
9472         if (i == 100)
9473                 return -ETIMEDOUT;
9474
9475         return 0;
9476 }
9477
9478 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9479 {
9480         struct pci_dev *root = rdev->pdev->bus->self;
9481         int bridge_pos, gpu_pos;
9482         u32 speed_cntl, mask, current_data_rate;
9483         int ret, i;
9484         u16 tmp16;
9485
9486         if (pci_is_root_bus(rdev->pdev->bus))
9487                 return;
9488
9489         if (radeon_pcie_gen2 == 0)
9490                 return;
9491
9492         if (rdev->flags & RADEON_IS_IGP)
9493                 return;
9494
9495         if (!(rdev->flags & RADEON_IS_PCIE))
9496                 return;
9497
9498         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9499         if (ret != 0)
9500                 return;
9501
9502         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9503                 return;
9504
9505         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9506         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9507                 LC_CURRENT_DATA_RATE_SHIFT;
9508         if (mask & DRM_PCIE_SPEED_80) {
9509                 if (current_data_rate == 2) {
9510                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9511                         return;
9512                 }
9513                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9514         } else if (mask & DRM_PCIE_SPEED_50) {
9515                 if (current_data_rate == 1) {
9516                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9517                         return;
9518                 }
9519                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9520         }
9521
9522         bridge_pos = pci_pcie_cap(root);
9523         if (!bridge_pos)
9524                 return;
9525
9526         gpu_pos = pci_pcie_cap(rdev->pdev);
9527         if (!gpu_pos)
9528                 return;
9529
9530         if (mask & DRM_PCIE_SPEED_80) {
9531                 /* re-try equalization if gen3 is not already enabled */
9532                 if (current_data_rate != 2) {
9533                         u16 bridge_cfg, gpu_cfg;
9534                         u16 bridge_cfg2, gpu_cfg2;
9535                         u32 max_lw, current_lw, tmp;
9536
9537                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9538                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9539
9540                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9541                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9542
9543                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9544                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9545
9546                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9547                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9548                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9549
9550                         if (current_lw < max_lw) {
9551                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9552                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9553                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9554                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9555                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9556                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9557                                 }
9558                         }
9559
9560                         for (i = 0; i < 10; i++) {
9561                                 /* check status */
9562                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9563                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9564                                         break;
9565
9566                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9567                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9568
9569                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9570                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9571
9572                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9573                                 tmp |= LC_SET_QUIESCE;
9574                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9575
9576                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9577                                 tmp |= LC_REDO_EQ;
9578                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9579
9580                                 mdelay(100);
9581
9582                                 /* linkctl */
9583                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9584                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9585                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9586                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9587
9588                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9589                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9590                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9591                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9592
9593                                 /* linkctl2 */
9594                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9595                                 tmp16 &= ~((1 << 4) | (7 << 9));
9596                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9597                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9598
9599                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9600                                 tmp16 &= ~((1 << 4) | (7 << 9));
9601                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9602                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9603
9604                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9605                                 tmp &= ~LC_SET_QUIESCE;
9606                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9607                         }
9608                 }
9609         }
9610
9611         /* set the link speed */
9612         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9613         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9614         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9615
9616         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9617         tmp16 &= ~0xf;
9618         if (mask & DRM_PCIE_SPEED_80)
9619                 tmp16 |= 3; /* gen3 */
9620         else if (mask & DRM_PCIE_SPEED_50)
9621                 tmp16 |= 2; /* gen2 */
9622         else
9623                 tmp16 |= 1; /* gen1 */
9624         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9625
9626         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9627         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9628         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9629
9630         for (i = 0; i < rdev->usec_timeout; i++) {
9631                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9632                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9633                         break;
9634                 udelay(1);
9635         }
9636 }
9637
9638 static void cik_program_aspm(struct radeon_device *rdev)
9639 {
9640         u32 data, orig;
9641         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9642         bool disable_clkreq = false;
9643
9644         if (radeon_aspm == 0)
9645                 return;
9646
9647         /* XXX double check IGPs */
9648         if (rdev->flags & RADEON_IS_IGP)
9649                 return;
9650
9651         if (!(rdev->flags & RADEON_IS_PCIE))
9652                 return;
9653
9654         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9655         data &= ~LC_XMIT_N_FTS_MASK;
9656         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9657         if (orig != data)
9658                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9659
9660         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9661         data |= LC_GO_TO_RECOVERY;
9662         if (orig != data)
9663                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9664
9665         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9666         data |= P_IGNORE_EDB_ERR;
9667         if (orig != data)
9668                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9669
9670         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9671         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9672         data |= LC_PMI_TO_L1_DIS;
9673         if (!disable_l0s)
9674                 data |= LC_L0S_INACTIVITY(7);
9675
9676         if (!disable_l1) {
9677                 data |= LC_L1_INACTIVITY(7);
9678                 data &= ~LC_PMI_TO_L1_DIS;
9679                 if (orig != data)
9680                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9681
9682                 if (!disable_plloff_in_l1) {
9683                         bool clk_req_support;
9684
9685                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9686                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9687                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9688                         if (orig != data)
9689                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9690
9691                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9692                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9693                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9694                         if (orig != data)
9695                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9696
9697                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9698                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9699                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9700                         if (orig != data)
9701                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9702
9703                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9704                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9705                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9706                         if (orig != data)
9707                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9708
9709                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9710                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9711                         data |= LC_DYN_LANES_PWR_STATE(3);
9712                         if (orig != data)
9713                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9714
9715                         if (!disable_clkreq &&
9716                             !pci_is_root_bus(rdev->pdev->bus)) {
9717                                 struct pci_dev *root = rdev->pdev->bus->self;
9718                                 u32 lnkcap;
9719
9720                                 clk_req_support = false;
9721                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9722                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9723                                         clk_req_support = true;
9724                         } else {
9725                                 clk_req_support = false;
9726                         }
9727
9728                         if (clk_req_support) {
9729                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9730                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9731                                 if (orig != data)
9732                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9733
9734                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9735                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9736                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9737                                 if (orig != data)
9738                                         WREG32_SMC(THM_CLK_CNTL, data);
9739
9740                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9741                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9742                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9743                                 if (orig != data)
9744                                         WREG32_SMC(MISC_CLK_CTRL, data);
9745
9746                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9747                                 data &= ~BCLK_AS_XCLK;
9748                                 if (orig != data)
9749                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9750
9751                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9752                                 data &= ~FORCE_BIF_REFCLK_EN;
9753                                 if (orig != data)
9754                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9755
9756                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9757                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9758                                 data |= MPLL_CLKOUT_SEL(4);
9759                                 if (orig != data)
9760                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9761                         }
9762                 }
9763         } else {
9764                 if (orig != data)
9765                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9766         }
9767
9768         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9769         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9770         if (orig != data)
9771                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9772
9773         if (!disable_l0s) {
9774                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9775                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9776                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9777                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9778                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9779                                 data &= ~LC_L0S_INACTIVITY_MASK;
9780                                 if (orig != data)
9781                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9782                         }
9783                 }
9784         }
9785 }