Merge branch 'old.dcache' into work.dcache
[sfrench/cifs-2.6.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122
123 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
124 extern void r600_ih_ring_fini(struct radeon_device *rdev);
125 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
126 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
128 extern void sumo_rlc_fini(struct radeon_device *rdev);
129 extern int sumo_rlc_init(struct radeon_device *rdev);
130 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
131 extern void si_rlc_reset(struct radeon_device *rdev);
132 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
133 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
134 extern int cik_sdma_resume(struct radeon_device *rdev);
135 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
136 extern void cik_sdma_fini(struct radeon_device *rdev);
137 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
138 static void cik_rlc_stop(struct radeon_device *rdev);
139 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
140 static void cik_program_aspm(struct radeon_device *rdev);
141 static void cik_init_pg(struct radeon_device *rdev);
142 static void cik_init_cg(struct radeon_device *rdev);
143 static void cik_fini_pg(struct radeon_device *rdev);
144 static void cik_fini_cg(struct radeon_device *rdev);
145 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
146                                           bool enable);
147
148 /**
149  * cik_get_allowed_info_register - fetch the register for the info ioctl
150  *
151  * @rdev: radeon_device pointer
152  * @reg: register offset in bytes
153  * @val: register value
154  *
155  * Returns 0 for success or -EINVAL for an invalid register
156  *
157  */
158 int cik_get_allowed_info_register(struct radeon_device *rdev,
159                                   u32 reg, u32 *val)
160 {
161         switch (reg) {
162         case GRBM_STATUS:
163         case GRBM_STATUS2:
164         case GRBM_STATUS_SE0:
165         case GRBM_STATUS_SE1:
166         case GRBM_STATUS_SE2:
167         case GRBM_STATUS_SE3:
168         case SRBM_STATUS:
169         case SRBM_STATUS2:
170         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
171         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
172         case UVD_STATUS:
173         /* TODO VCE */
174                 *val = RREG32(reg);
175                 return 0;
176         default:
177                 return -EINVAL;
178         }
179 }
180
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
185 {
186         unsigned long flags;
187         u32 r;
188
189         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
190         WREG32(CIK_DIDT_IND_INDEX, (reg));
191         r = RREG32(CIK_DIDT_IND_DATA);
192         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
193         return r;
194 }
195
196 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198         unsigned long flags;
199
200         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
201         WREG32(CIK_DIDT_IND_INDEX, (reg));
202         WREG32(CIK_DIDT_IND_DATA, (v));
203         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
204 }
205
206 /* get temperature in millidegrees */
207 int ci_get_temp(struct radeon_device *rdev)
208 {
209         u32 temp;
210         int actual_temp = 0;
211
212         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
213                 CTF_TEMP_SHIFT;
214
215         if (temp & 0x200)
216                 actual_temp = 255;
217         else
218                 actual_temp = temp & 0x1ff;
219
220         actual_temp = actual_temp * 1000;
221
222         return actual_temp;
223 }
224
225 /* get temperature in millidegrees */
226 int kv_get_temp(struct radeon_device *rdev)
227 {
228         u32 temp;
229         int actual_temp = 0;
230
231         temp = RREG32_SMC(0xC0300E0C);
232
233         if (temp)
234                 actual_temp = (temp / 8) - 49;
235         else
236                 actual_temp = 0;
237
238         actual_temp = actual_temp * 1000;
239
240         return actual_temp;
241 }
242
243 /*
244  * Indirect registers accessor
245  */
246 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
247 {
248         unsigned long flags;
249         u32 r;
250
251         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
252         WREG32(PCIE_INDEX, reg);
253         (void)RREG32(PCIE_INDEX);
254         r = RREG32(PCIE_DATA);
255         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
256         return r;
257 }
258
259 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
260 {
261         unsigned long flags;
262
263         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
264         WREG32(PCIE_INDEX, reg);
265         (void)RREG32(PCIE_INDEX);
266         WREG32(PCIE_DATA, v);
267         (void)RREG32(PCIE_DATA);
268         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
269 }
270
271 static const u32 spectre_rlc_save_restore_register_list[] =
272 {
273         (0x0e00 << 16) | (0xc12c >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc140 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc150 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc15c >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc168 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc170 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc178 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc204 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2b4 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2b8 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc2bc >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc2c0 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x8228 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0x829c >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x869c >> 2),
302         0x00000000,
303         (0x0600 << 16) | (0x98f4 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0x98f8 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0x9900 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0xc260 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x90e8 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x3c000 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x3c00c >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x8c1c >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0x9700 >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x4e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x5e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x6e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x7e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0x8e00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0x9e00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0xae00 << 16) | (0xcd20 >> 2),
336         0x00000000,
337         (0xbe00 << 16) | (0xcd20 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x89bc >> 2),
340         0x00000000,
341         (0x0e00 << 16) | (0x8900 >> 2),
342         0x00000000,
343         0x3,
344         (0x0e00 << 16) | (0xc130 >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc134 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc1fc >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc208 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc264 >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc268 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc26c >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc270 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc274 >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc278 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc27c >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc280 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc284 >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc288 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc28c >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc290 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc294 >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc298 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc29c >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a0 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2a4 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2a8 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0xc2ac  >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0xc2b0 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x301d0 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30238 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30250 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x30254 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30258 >> 2),
401         0x00000000,
402         (0x0e00 << 16) | (0x3025c >> 2),
403         0x00000000,
404         (0x4e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x5e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x6e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x7e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0x8e00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0x9e00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0xae00 << 16) | (0xc900 >> 2),
417         0x00000000,
418         (0xbe00 << 16) | (0xc900 >> 2),
419         0x00000000,
420         (0x4e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x5e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x6e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x7e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0x8e00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0x9e00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0xae00 << 16) | (0xc904 >> 2),
433         0x00000000,
434         (0xbe00 << 16) | (0xc904 >> 2),
435         0x00000000,
436         (0x4e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x5e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x6e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x7e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0x8e00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0x9e00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0xae00 << 16) | (0xc908 >> 2),
449         0x00000000,
450         (0xbe00 << 16) | (0xc908 >> 2),
451         0x00000000,
452         (0x4e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x5e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x6e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x7e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0x8e00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0x9e00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0xae00 << 16) | (0xc90c >> 2),
465         0x00000000,
466         (0xbe00 << 16) | (0xc90c >> 2),
467         0x00000000,
468         (0x4e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x5e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x6e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x7e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0x8e00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0x9e00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0xae00 << 16) | (0xc910 >> 2),
481         0x00000000,
482         (0xbe00 << 16) | (0xc910 >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xc99c >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9834 >> 2),
487         0x00000000,
488         (0x0000 << 16) | (0x30f00 >> 2),
489         0x00000000,
490         (0x0001 << 16) | (0x30f00 >> 2),
491         0x00000000,
492         (0x0000 << 16) | (0x30f04 >> 2),
493         0x00000000,
494         (0x0001 << 16) | (0x30f04 >> 2),
495         0x00000000,
496         (0x0000 << 16) | (0x30f08 >> 2),
497         0x00000000,
498         (0x0001 << 16) | (0x30f08 >> 2),
499         0x00000000,
500         (0x0000 << 16) | (0x30f0c >> 2),
501         0x00000000,
502         (0x0001 << 16) | (0x30f0c >> 2),
503         0x00000000,
504         (0x0600 << 16) | (0x9b7c >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x8a14 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8a18 >> 2),
509         0x00000000,
510         (0x0600 << 16) | (0x30a00 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8bf0 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x8bcc >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x8b24 >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x30a04 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a10 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a14 >> 2),
523         0x00000000,
524         (0x0600 << 16) | (0x30a18 >> 2),
525         0x00000000,
526         (0x0600 << 16) | (0x30a2c >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc700 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc704 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xc708 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0xc768 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc770 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc774 >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc778 >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc77c >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc780 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc784 >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc788 >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc78c >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc798 >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc79c >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a0 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7a4 >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7a8 >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7ac >> 2),
563         0x00000000,
564         (0x0400 << 16) | (0xc7b0 >> 2),
565         0x00000000,
566         (0x0400 << 16) | (0xc7b4 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x9100 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x3c010 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92a8 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92ac >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92b4 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92b8 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92bc >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c0 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92c4 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92c8 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x92cc >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x92d0 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c00 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c04 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c20 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0x8c38 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x8c3c >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xae00 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0x9604 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac08 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac0c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac10 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac14 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac58 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac68 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac6c >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac70 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac74 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac78 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac7c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac80 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac84 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0xac88 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0xac8c >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x970c >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x9714 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x9718 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x971c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x4e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x5e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x6e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x7e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0x8e00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0x9e00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0xae00 << 16) | (0x31068 >> 2),
659         0x00000000,
660         (0xbe00 << 16) | (0x31068 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xcd10 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xcd14 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b0 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88b4 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x88b8 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x88bc >> 2),
673         0x00000000,
674         (0x0400 << 16) | (0x89c0 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88c4 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88c8 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d0 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x88d4 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x88d8 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x8980 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x30938 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x3093c >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x30940 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x89a0 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30900 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x30904 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x89b4 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c210 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x3c214 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x3c218 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0x8904 >> 2),
709         0x00000000,
710         0x5,
711         (0x0e00 << 16) | (0x8c28 >> 2),
712         (0x0e00 << 16) | (0x8c2c >> 2),
713         (0x0e00 << 16) | (0x8c30 >> 2),
714         (0x0e00 << 16) | (0x8c34 >> 2),
715         (0x0e00 << 16) | (0x9600 >> 2),
716 };
717
718 static const u32 kalindi_rlc_save_restore_register_list[] =
719 {
720         (0x0e00 << 16) | (0xc12c >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc140 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc150 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc15c >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc168 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc170 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc204 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2b4 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2b8 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc2bc >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0xc2c0 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x8228 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x829c >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x869c >> 2),
747         0x00000000,
748         (0x0600 << 16) | (0x98f4 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0x98f8 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x9900 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0xc260 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x90e8 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x3c000 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x3c00c >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x8c1c >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0x9700 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x4e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x5e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x6e00 << 16) | (0xcd20 >> 2),
773         0x00000000,
774         (0x7e00 << 16) | (0xcd20 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x89bc >> 2),
777         0x00000000,
778         (0x0e00 << 16) | (0x8900 >> 2),
779         0x00000000,
780         0x3,
781         (0x0e00 << 16) | (0xc130 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc134 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc1fc >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc208 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc264 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc268 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc26c >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc270 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc274 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc28c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc290 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc294 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc298 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a0 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2a4 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xc2a8 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0xc2ac >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x301d0 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30238 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30250 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x30254 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0x30258 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0x3025c >> 2),
826         0x00000000,
827         (0x4e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x5e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x6e00 << 16) | (0xc900 >> 2),
832         0x00000000,
833         (0x7e00 << 16) | (0xc900 >> 2),
834         0x00000000,
835         (0x4e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x5e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x6e00 << 16) | (0xc904 >> 2),
840         0x00000000,
841         (0x7e00 << 16) | (0xc904 >> 2),
842         0x00000000,
843         (0x4e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x5e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x6e00 << 16) | (0xc908 >> 2),
848         0x00000000,
849         (0x7e00 << 16) | (0xc908 >> 2),
850         0x00000000,
851         (0x4e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x5e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x6e00 << 16) | (0xc90c >> 2),
856         0x00000000,
857         (0x7e00 << 16) | (0xc90c >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0xc910 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0xc910 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xc99c >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x9834 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f00 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f04 >> 2),
874         0x00000000,
875         (0x0000 << 16) | (0x30f08 >> 2),
876         0x00000000,
877         (0x0000 << 16) | (0x30f0c >> 2),
878         0x00000000,
879         (0x0600 << 16) | (0x9b7c >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x8a14 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8a18 >> 2),
884         0x00000000,
885         (0x0600 << 16) | (0x30a00 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8bf0 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x8bcc >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8b24 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30a04 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a10 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a14 >> 2),
898         0x00000000,
899         (0x0600 << 16) | (0x30a18 >> 2),
900         0x00000000,
901         (0x0600 << 16) | (0x30a2c >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc700 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc704 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0xc708 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0xc768 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc770 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc774 >> 2),
914         0x00000000,
915         (0x0400 << 16) | (0xc798 >> 2),
916         0x00000000,
917         (0x0400 << 16) | (0xc79c >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x9100 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x3c010 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c00 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c04 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c20 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x8c38 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x8c3c >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xae00 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0x9604 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac08 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac0c >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac10 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac14 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac58 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac68 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac6c >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac70 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac74 >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac78 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac7c >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac80 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac84 >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0xac88 >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0xac8c >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x970c >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x9714 >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x9718 >> 2),
972         0x00000000,
973         (0x0e00 << 16) | (0x971c >> 2),
974         0x00000000,
975         (0x0e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x4e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x5e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x6e00 << 16) | (0x31068 >> 2),
982         0x00000000,
983         (0x7e00 << 16) | (0x31068 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0xcd10 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0xcd14 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b0 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88b4 >> 2),
992         0x00000000,
993         (0x0e00 << 16) | (0x88b8 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x88bc >> 2),
996         0x00000000,
997         (0x0400 << 16) | (0x89c0 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88c4 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88c8 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d0 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x88d4 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x88d8 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x8980 >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x30938 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x3093c >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30940 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x89a0 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x30900 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x30904 >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x89b4 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3e1fc >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c210 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x3c214 >> 2),
1030         0x00000000,
1031         (0x0e00 << 16) | (0x3c218 >> 2),
1032         0x00000000,
1033         (0x0e00 << 16) | (0x8904 >> 2),
1034         0x00000000,
1035         0x5,
1036         (0x0e00 << 16) | (0x8c28 >> 2),
1037         (0x0e00 << 16) | (0x8c2c >> 2),
1038         (0x0e00 << 16) | (0x8c30 >> 2),
1039         (0x0e00 << 16) | (0x8c34 >> 2),
1040         (0x0e00 << 16) | (0x9600 >> 2),
1041 };
1042
1043 static const u32 bonaire_golden_spm_registers[] =
1044 {
1045         0x30800, 0xe0ffffff, 0xe0000000
1046 };
1047
1048 static const u32 bonaire_golden_common_registers[] =
1049 {
1050         0xc770, 0xffffffff, 0x00000800,
1051         0xc774, 0xffffffff, 0x00000800,
1052         0xc798, 0xffffffff, 0x00007fbf,
1053         0xc79c, 0xffffffff, 0x00007faf
1054 };
1055
1056 static const u32 bonaire_golden_registers[] =
1057 {
1058         0x3354, 0x00000333, 0x00000333,
1059         0x3350, 0x000c0fc0, 0x00040200,
1060         0x9a10, 0x00010000, 0x00058208,
1061         0x3c000, 0xffff1fff, 0x00140000,
1062         0x3c200, 0xfdfc0fff, 0x00000100,
1063         0x3c234, 0x40000000, 0x40000200,
1064         0x9830, 0xffffffff, 0x00000000,
1065         0x9834, 0xf00fffff, 0x00000400,
1066         0x9838, 0x0002021c, 0x00020200,
1067         0xc78, 0x00000080, 0x00000000,
1068         0x5bb0, 0x000000f0, 0x00000070,
1069         0x5bc0, 0xf0311fff, 0x80300000,
1070         0x98f8, 0x73773777, 0x12010001,
1071         0x350c, 0x00810000, 0x408af000,
1072         0x7030, 0x31000111, 0x00000011,
1073         0x2f48, 0x73773777, 0x12010001,
1074         0x220c, 0x00007fb6, 0x0021a1b1,
1075         0x2210, 0x00007fb6, 0x002021b1,
1076         0x2180, 0x00007fb6, 0x00002191,
1077         0x2218, 0x00007fb6, 0x002121b1,
1078         0x221c, 0x00007fb6, 0x002021b1,
1079         0x21dc, 0x00007fb6, 0x00002191,
1080         0x21e0, 0x00007fb6, 0x00002191,
1081         0x3628, 0x0000003f, 0x0000000a,
1082         0x362c, 0x0000003f, 0x0000000a,
1083         0x2ae4, 0x00073ffe, 0x000022a2,
1084         0x240c, 0x000007ff, 0x00000000,
1085         0x8a14, 0xf000003f, 0x00000007,
1086         0x8bf0, 0x00002001, 0x00000001,
1087         0x8b24, 0xffffffff, 0x00ffffff,
1088         0x30a04, 0x0000ff0f, 0x00000000,
1089         0x28a4c, 0x07ffffff, 0x06000000,
1090         0x4d8, 0x00000fff, 0x00000100,
1091         0x3e78, 0x00000001, 0x00000002,
1092         0x9100, 0x03000000, 0x0362c688,
1093         0x8c00, 0x000000ff, 0x00000001,
1094         0xe40, 0x00001fff, 0x00001fff,
1095         0x9060, 0x0000007f, 0x00000020,
1096         0x9508, 0x00010000, 0x00010000,
1097         0xac14, 0x000003ff, 0x000000f3,
1098         0xac0c, 0xffffffff, 0x00001032
1099 };
1100
1101 static const u32 bonaire_mgcg_cgcg_init[] =
1102 {
1103         0xc420, 0xffffffff, 0xfffffffc,
1104         0x30800, 0xffffffff, 0xe0000000,
1105         0x3c2a0, 0xffffffff, 0x00000100,
1106         0x3c208, 0xffffffff, 0x00000100,
1107         0x3c2c0, 0xffffffff, 0xc0000100,
1108         0x3c2c8, 0xffffffff, 0xc0000100,
1109         0x3c2c4, 0xffffffff, 0xc0000100,
1110         0x55e4, 0xffffffff, 0x00600100,
1111         0x3c280, 0xffffffff, 0x00000100,
1112         0x3c214, 0xffffffff, 0x06000100,
1113         0x3c220, 0xffffffff, 0x00000100,
1114         0x3c218, 0xffffffff, 0x06000100,
1115         0x3c204, 0xffffffff, 0x00000100,
1116         0x3c2e0, 0xffffffff, 0x00000100,
1117         0x3c224, 0xffffffff, 0x00000100,
1118         0x3c200, 0xffffffff, 0x00000100,
1119         0x3c230, 0xffffffff, 0x00000100,
1120         0x3c234, 0xffffffff, 0x00000100,
1121         0x3c250, 0xffffffff, 0x00000100,
1122         0x3c254, 0xffffffff, 0x00000100,
1123         0x3c258, 0xffffffff, 0x00000100,
1124         0x3c25c, 0xffffffff, 0x00000100,
1125         0x3c260, 0xffffffff, 0x00000100,
1126         0x3c27c, 0xffffffff, 0x00000100,
1127         0x3c278, 0xffffffff, 0x00000100,
1128         0x3c210, 0xffffffff, 0x06000100,
1129         0x3c290, 0xffffffff, 0x00000100,
1130         0x3c274, 0xffffffff, 0x00000100,
1131         0x3c2b4, 0xffffffff, 0x00000100,
1132         0x3c2b0, 0xffffffff, 0x00000100,
1133         0x3c270, 0xffffffff, 0x00000100,
1134         0x30800, 0xffffffff, 0xe0000000,
1135         0x3c020, 0xffffffff, 0x00010000,
1136         0x3c024, 0xffffffff, 0x00030002,
1137         0x3c028, 0xffffffff, 0x00040007,
1138         0x3c02c, 0xffffffff, 0x00060005,
1139         0x3c030, 0xffffffff, 0x00090008,
1140         0x3c034, 0xffffffff, 0x00010000,
1141         0x3c038, 0xffffffff, 0x00030002,
1142         0x3c03c, 0xffffffff, 0x00040007,
1143         0x3c040, 0xffffffff, 0x00060005,
1144         0x3c044, 0xffffffff, 0x00090008,
1145         0x3c048, 0xffffffff, 0x00010000,
1146         0x3c04c, 0xffffffff, 0x00030002,
1147         0x3c050, 0xffffffff, 0x00040007,
1148         0x3c054, 0xffffffff, 0x00060005,
1149         0x3c058, 0xffffffff, 0x00090008,
1150         0x3c05c, 0xffffffff, 0x00010000,
1151         0x3c060, 0xffffffff, 0x00030002,
1152         0x3c064, 0xffffffff, 0x00040007,
1153         0x3c068, 0xffffffff, 0x00060005,
1154         0x3c06c, 0xffffffff, 0x00090008,
1155         0x3c070, 0xffffffff, 0x00010000,
1156         0x3c074, 0xffffffff, 0x00030002,
1157         0x3c078, 0xffffffff, 0x00040007,
1158         0x3c07c, 0xffffffff, 0x00060005,
1159         0x3c080, 0xffffffff, 0x00090008,
1160         0x3c084, 0xffffffff, 0x00010000,
1161         0x3c088, 0xffffffff, 0x00030002,
1162         0x3c08c, 0xffffffff, 0x00040007,
1163         0x3c090, 0xffffffff, 0x00060005,
1164         0x3c094, 0xffffffff, 0x00090008,
1165         0x3c098, 0xffffffff, 0x00010000,
1166         0x3c09c, 0xffffffff, 0x00030002,
1167         0x3c0a0, 0xffffffff, 0x00040007,
1168         0x3c0a4, 0xffffffff, 0x00060005,
1169         0x3c0a8, 0xffffffff, 0x00090008,
1170         0x3c000, 0xffffffff, 0x96e00200,
1171         0x8708, 0xffffffff, 0x00900100,
1172         0xc424, 0xffffffff, 0x0020003f,
1173         0x38, 0xffffffff, 0x0140001c,
1174         0x3c, 0x000f0000, 0x000f0000,
1175         0x220, 0xffffffff, 0xC060000C,
1176         0x224, 0xc0000fff, 0x00000100,
1177         0xf90, 0xffffffff, 0x00000100,
1178         0xf98, 0x00000101, 0x00000000,
1179         0x20a8, 0xffffffff, 0x00000104,
1180         0x55e4, 0xff000fff, 0x00000100,
1181         0x30cc, 0xc0000fff, 0x00000104,
1182         0xc1e4, 0x00000001, 0x00000001,
1183         0xd00c, 0xff000ff0, 0x00000100,
1184         0xd80c, 0xff000ff0, 0x00000100
1185 };
1186
1187 static const u32 spectre_golden_spm_registers[] =
1188 {
1189         0x30800, 0xe0ffffff, 0xe0000000
1190 };
1191
1192 static const u32 spectre_golden_common_registers[] =
1193 {
1194         0xc770, 0xffffffff, 0x00000800,
1195         0xc774, 0xffffffff, 0x00000800,
1196         0xc798, 0xffffffff, 0x00007fbf,
1197         0xc79c, 0xffffffff, 0x00007faf
1198 };
1199
1200 static const u32 spectre_golden_registers[] =
1201 {
1202         0x3c000, 0xffff1fff, 0x96940200,
1203         0x3c00c, 0xffff0001, 0xff000000,
1204         0x3c200, 0xfffc0fff, 0x00000100,
1205         0x6ed8, 0x00010101, 0x00010000,
1206         0x9834, 0xf00fffff, 0x00000400,
1207         0x9838, 0xfffffffc, 0x00020200,
1208         0x5bb0, 0x000000f0, 0x00000070,
1209         0x5bc0, 0xf0311fff, 0x80300000,
1210         0x98f8, 0x73773777, 0x12010001,
1211         0x9b7c, 0x00ff0000, 0x00fc0000,
1212         0x2f48, 0x73773777, 0x12010001,
1213         0x8a14, 0xf000003f, 0x00000007,
1214         0x8b24, 0xffffffff, 0x00ffffff,
1215         0x28350, 0x3f3f3fff, 0x00000082,
1216         0x28354, 0x0000003f, 0x00000000,
1217         0x3e78, 0x00000001, 0x00000002,
1218         0x913c, 0xffff03df, 0x00000004,
1219         0xc768, 0x00000008, 0x00000008,
1220         0x8c00, 0x000008ff, 0x00000800,
1221         0x9508, 0x00010000, 0x00010000,
1222         0xac0c, 0xffffffff, 0x54763210,
1223         0x214f8, 0x01ff01ff, 0x00000002,
1224         0x21498, 0x007ff800, 0x00200000,
1225         0x2015c, 0xffffffff, 0x00000f40,
1226         0x30934, 0xffffffff, 0x00000001
1227 };
1228
1229 static const u32 spectre_mgcg_cgcg_init[] =
1230 {
1231         0xc420, 0xffffffff, 0xfffffffc,
1232         0x30800, 0xffffffff, 0xe0000000,
1233         0x3c2a0, 0xffffffff, 0x00000100,
1234         0x3c208, 0xffffffff, 0x00000100,
1235         0x3c2c0, 0xffffffff, 0x00000100,
1236         0x3c2c8, 0xffffffff, 0x00000100,
1237         0x3c2c4, 0xffffffff, 0x00000100,
1238         0x55e4, 0xffffffff, 0x00600100,
1239         0x3c280, 0xffffffff, 0x00000100,
1240         0x3c214, 0xffffffff, 0x06000100,
1241         0x3c220, 0xffffffff, 0x00000100,
1242         0x3c218, 0xffffffff, 0x06000100,
1243         0x3c204, 0xffffffff, 0x00000100,
1244         0x3c2e0, 0xffffffff, 0x00000100,
1245         0x3c224, 0xffffffff, 0x00000100,
1246         0x3c200, 0xffffffff, 0x00000100,
1247         0x3c230, 0xffffffff, 0x00000100,
1248         0x3c234, 0xffffffff, 0x00000100,
1249         0x3c250, 0xffffffff, 0x00000100,
1250         0x3c254, 0xffffffff, 0x00000100,
1251         0x3c258, 0xffffffff, 0x00000100,
1252         0x3c25c, 0xffffffff, 0x00000100,
1253         0x3c260, 0xffffffff, 0x00000100,
1254         0x3c27c, 0xffffffff, 0x00000100,
1255         0x3c278, 0xffffffff, 0x00000100,
1256         0x3c210, 0xffffffff, 0x06000100,
1257         0x3c290, 0xffffffff, 0x00000100,
1258         0x3c274, 0xffffffff, 0x00000100,
1259         0x3c2b4, 0xffffffff, 0x00000100,
1260         0x3c2b0, 0xffffffff, 0x00000100,
1261         0x3c270, 0xffffffff, 0x00000100,
1262         0x30800, 0xffffffff, 0xe0000000,
1263         0x3c020, 0xffffffff, 0x00010000,
1264         0x3c024, 0xffffffff, 0x00030002,
1265         0x3c028, 0xffffffff, 0x00040007,
1266         0x3c02c, 0xffffffff, 0x00060005,
1267         0x3c030, 0xffffffff, 0x00090008,
1268         0x3c034, 0xffffffff, 0x00010000,
1269         0x3c038, 0xffffffff, 0x00030002,
1270         0x3c03c, 0xffffffff, 0x00040007,
1271         0x3c040, 0xffffffff, 0x00060005,
1272         0x3c044, 0xffffffff, 0x00090008,
1273         0x3c048, 0xffffffff, 0x00010000,
1274         0x3c04c, 0xffffffff, 0x00030002,
1275         0x3c050, 0xffffffff, 0x00040007,
1276         0x3c054, 0xffffffff, 0x00060005,
1277         0x3c058, 0xffffffff, 0x00090008,
1278         0x3c05c, 0xffffffff, 0x00010000,
1279         0x3c060, 0xffffffff, 0x00030002,
1280         0x3c064, 0xffffffff, 0x00040007,
1281         0x3c068, 0xffffffff, 0x00060005,
1282         0x3c06c, 0xffffffff, 0x00090008,
1283         0x3c070, 0xffffffff, 0x00010000,
1284         0x3c074, 0xffffffff, 0x00030002,
1285         0x3c078, 0xffffffff, 0x00040007,
1286         0x3c07c, 0xffffffff, 0x00060005,
1287         0x3c080, 0xffffffff, 0x00090008,
1288         0x3c084, 0xffffffff, 0x00010000,
1289         0x3c088, 0xffffffff, 0x00030002,
1290         0x3c08c, 0xffffffff, 0x00040007,
1291         0x3c090, 0xffffffff, 0x00060005,
1292         0x3c094, 0xffffffff, 0x00090008,
1293         0x3c098, 0xffffffff, 0x00010000,
1294         0x3c09c, 0xffffffff, 0x00030002,
1295         0x3c0a0, 0xffffffff, 0x00040007,
1296         0x3c0a4, 0xffffffff, 0x00060005,
1297         0x3c0a8, 0xffffffff, 0x00090008,
1298         0x3c0ac, 0xffffffff, 0x00010000,
1299         0x3c0b0, 0xffffffff, 0x00030002,
1300         0x3c0b4, 0xffffffff, 0x00040007,
1301         0x3c0b8, 0xffffffff, 0x00060005,
1302         0x3c0bc, 0xffffffff, 0x00090008,
1303         0x3c000, 0xffffffff, 0x96e00200,
1304         0x8708, 0xffffffff, 0x00900100,
1305         0xc424, 0xffffffff, 0x0020003f,
1306         0x38, 0xffffffff, 0x0140001c,
1307         0x3c, 0x000f0000, 0x000f0000,
1308         0x220, 0xffffffff, 0xC060000C,
1309         0x224, 0xc0000fff, 0x00000100,
1310         0xf90, 0xffffffff, 0x00000100,
1311         0xf98, 0x00000101, 0x00000000,
1312         0x20a8, 0xffffffff, 0x00000104,
1313         0x55e4, 0xff000fff, 0x00000100,
1314         0x30cc, 0xc0000fff, 0x00000104,
1315         0xc1e4, 0x00000001, 0x00000001,
1316         0xd00c, 0xff000ff0, 0x00000100,
1317         0xd80c, 0xff000ff0, 0x00000100
1318 };
1319
1320 static const u32 kalindi_golden_spm_registers[] =
1321 {
1322         0x30800, 0xe0ffffff, 0xe0000000
1323 };
1324
1325 static const u32 kalindi_golden_common_registers[] =
1326 {
1327         0xc770, 0xffffffff, 0x00000800,
1328         0xc774, 0xffffffff, 0x00000800,
1329         0xc798, 0xffffffff, 0x00007fbf,
1330         0xc79c, 0xffffffff, 0x00007faf
1331 };
1332
1333 static const u32 kalindi_golden_registers[] =
1334 {
1335         0x3c000, 0xffffdfff, 0x6e944040,
1336         0x55e4, 0xff607fff, 0xfc000100,
1337         0x3c220, 0xff000fff, 0x00000100,
1338         0x3c224, 0xff000fff, 0x00000100,
1339         0x3c200, 0xfffc0fff, 0x00000100,
1340         0x6ed8, 0x00010101, 0x00010000,
1341         0x9830, 0xffffffff, 0x00000000,
1342         0x9834, 0xf00fffff, 0x00000400,
1343         0x5bb0, 0x000000f0, 0x00000070,
1344         0x5bc0, 0xf0311fff, 0x80300000,
1345         0x98f8, 0x73773777, 0x12010001,
1346         0x98fc, 0xffffffff, 0x00000010,
1347         0x9b7c, 0x00ff0000, 0x00fc0000,
1348         0x8030, 0x00001f0f, 0x0000100a,
1349         0x2f48, 0x73773777, 0x12010001,
1350         0x2408, 0x000fffff, 0x000c007f,
1351         0x8a14, 0xf000003f, 0x00000007,
1352         0x8b24, 0x3fff3fff, 0x00ffcfff,
1353         0x30a04, 0x0000ff0f, 0x00000000,
1354         0x28a4c, 0x07ffffff, 0x06000000,
1355         0x4d8, 0x00000fff, 0x00000100,
1356         0x3e78, 0x00000001, 0x00000002,
1357         0xc768, 0x00000008, 0x00000008,
1358         0x8c00, 0x000000ff, 0x00000003,
1359         0x214f8, 0x01ff01ff, 0x00000002,
1360         0x21498, 0x007ff800, 0x00200000,
1361         0x2015c, 0xffffffff, 0x00000f40,
1362         0x88c4, 0x001f3ae3, 0x00000082,
1363         0x88d4, 0x0000001f, 0x00000010,
1364         0x30934, 0xffffffff, 0x00000000
1365 };
1366
1367 static const u32 kalindi_mgcg_cgcg_init[] =
1368 {
1369         0xc420, 0xffffffff, 0xfffffffc,
1370         0x30800, 0xffffffff, 0xe0000000,
1371         0x3c2a0, 0xffffffff, 0x00000100,
1372         0x3c208, 0xffffffff, 0x00000100,
1373         0x3c2c0, 0xffffffff, 0x00000100,
1374         0x3c2c8, 0xffffffff, 0x00000100,
1375         0x3c2c4, 0xffffffff, 0x00000100,
1376         0x55e4, 0xffffffff, 0x00600100,
1377         0x3c280, 0xffffffff, 0x00000100,
1378         0x3c214, 0xffffffff, 0x06000100,
1379         0x3c220, 0xffffffff, 0x00000100,
1380         0x3c218, 0xffffffff, 0x06000100,
1381         0x3c204, 0xffffffff, 0x00000100,
1382         0x3c2e0, 0xffffffff, 0x00000100,
1383         0x3c224, 0xffffffff, 0x00000100,
1384         0x3c200, 0xffffffff, 0x00000100,
1385         0x3c230, 0xffffffff, 0x00000100,
1386         0x3c234, 0xffffffff, 0x00000100,
1387         0x3c250, 0xffffffff, 0x00000100,
1388         0x3c254, 0xffffffff, 0x00000100,
1389         0x3c258, 0xffffffff, 0x00000100,
1390         0x3c25c, 0xffffffff, 0x00000100,
1391         0x3c260, 0xffffffff, 0x00000100,
1392         0x3c27c, 0xffffffff, 0x00000100,
1393         0x3c278, 0xffffffff, 0x00000100,
1394         0x3c210, 0xffffffff, 0x06000100,
1395         0x3c290, 0xffffffff, 0x00000100,
1396         0x3c274, 0xffffffff, 0x00000100,
1397         0x3c2b4, 0xffffffff, 0x00000100,
1398         0x3c2b0, 0xffffffff, 0x00000100,
1399         0x3c270, 0xffffffff, 0x00000100,
1400         0x30800, 0xffffffff, 0xe0000000,
1401         0x3c020, 0xffffffff, 0x00010000,
1402         0x3c024, 0xffffffff, 0x00030002,
1403         0x3c028, 0xffffffff, 0x00040007,
1404         0x3c02c, 0xffffffff, 0x00060005,
1405         0x3c030, 0xffffffff, 0x00090008,
1406         0x3c034, 0xffffffff, 0x00010000,
1407         0x3c038, 0xffffffff, 0x00030002,
1408         0x3c03c, 0xffffffff, 0x00040007,
1409         0x3c040, 0xffffffff, 0x00060005,
1410         0x3c044, 0xffffffff, 0x00090008,
1411         0x3c000, 0xffffffff, 0x96e00200,
1412         0x8708, 0xffffffff, 0x00900100,
1413         0xc424, 0xffffffff, 0x0020003f,
1414         0x38, 0xffffffff, 0x0140001c,
1415         0x3c, 0x000f0000, 0x000f0000,
1416         0x220, 0xffffffff, 0xC060000C,
1417         0x224, 0xc0000fff, 0x00000100,
1418         0x20a8, 0xffffffff, 0x00000104,
1419         0x55e4, 0xff000fff, 0x00000100,
1420         0x30cc, 0xc0000fff, 0x00000104,
1421         0xc1e4, 0x00000001, 0x00000001,
1422         0xd00c, 0xff000ff0, 0x00000100,
1423         0xd80c, 0xff000ff0, 0x00000100
1424 };
1425
1426 static const u32 hawaii_golden_spm_registers[] =
1427 {
1428         0x30800, 0xe0ffffff, 0xe0000000
1429 };
1430
1431 static const u32 hawaii_golden_common_registers[] =
1432 {
1433         0x30800, 0xffffffff, 0xe0000000,
1434         0x28350, 0xffffffff, 0x3a00161a,
1435         0x28354, 0xffffffff, 0x0000002e,
1436         0x9a10, 0xffffffff, 0x00018208,
1437         0x98f8, 0xffffffff, 0x12011003
1438 };
1439
1440 static const u32 hawaii_golden_registers[] =
1441 {
1442         0x3354, 0x00000333, 0x00000333,
1443         0x9a10, 0x00010000, 0x00058208,
1444         0x9830, 0xffffffff, 0x00000000,
1445         0x9834, 0xf00fffff, 0x00000400,
1446         0x9838, 0x0002021c, 0x00020200,
1447         0xc78, 0x00000080, 0x00000000,
1448         0x5bb0, 0x000000f0, 0x00000070,
1449         0x5bc0, 0xf0311fff, 0x80300000,
1450         0x350c, 0x00810000, 0x408af000,
1451         0x7030, 0x31000111, 0x00000011,
1452         0x2f48, 0x73773777, 0x12010001,
1453         0x2120, 0x0000007f, 0x0000001b,
1454         0x21dc, 0x00007fb6, 0x00002191,
1455         0x3628, 0x0000003f, 0x0000000a,
1456         0x362c, 0x0000003f, 0x0000000a,
1457         0x2ae4, 0x00073ffe, 0x000022a2,
1458         0x240c, 0x000007ff, 0x00000000,
1459         0x8bf0, 0x00002001, 0x00000001,
1460         0x8b24, 0xffffffff, 0x00ffffff,
1461         0x30a04, 0x0000ff0f, 0x00000000,
1462         0x28a4c, 0x07ffffff, 0x06000000,
1463         0x3e78, 0x00000001, 0x00000002,
1464         0xc768, 0x00000008, 0x00000008,
1465         0xc770, 0x00000f00, 0x00000800,
1466         0xc774, 0x00000f00, 0x00000800,
1467         0xc798, 0x00ffffff, 0x00ff7fbf,
1468         0xc79c, 0x00ffffff, 0x00ff7faf,
1469         0x8c00, 0x000000ff, 0x00000800,
1470         0xe40, 0x00001fff, 0x00001fff,
1471         0x9060, 0x0000007f, 0x00000020,
1472         0x9508, 0x00010000, 0x00010000,
1473         0xae00, 0x00100000, 0x000ff07c,
1474         0xac14, 0x000003ff, 0x0000000f,
1475         0xac10, 0xffffffff, 0x7564fdec,
1476         0xac0c, 0xffffffff, 0x3120b9a8,
1477         0xac08, 0x20000000, 0x0f9c0000
1478 };
1479
1480 static const u32 hawaii_mgcg_cgcg_init[] =
1481 {
1482         0xc420, 0xffffffff, 0xfffffffd,
1483         0x30800, 0xffffffff, 0xe0000000,
1484         0x3c2a0, 0xffffffff, 0x00000100,
1485         0x3c208, 0xffffffff, 0x00000100,
1486         0x3c2c0, 0xffffffff, 0x00000100,
1487         0x3c2c8, 0xffffffff, 0x00000100,
1488         0x3c2c4, 0xffffffff, 0x00000100,
1489         0x55e4, 0xffffffff, 0x00200100,
1490         0x3c280, 0xffffffff, 0x00000100,
1491         0x3c214, 0xffffffff, 0x06000100,
1492         0x3c220, 0xffffffff, 0x00000100,
1493         0x3c218, 0xffffffff, 0x06000100,
1494         0x3c204, 0xffffffff, 0x00000100,
1495         0x3c2e0, 0xffffffff, 0x00000100,
1496         0x3c224, 0xffffffff, 0x00000100,
1497         0x3c200, 0xffffffff, 0x00000100,
1498         0x3c230, 0xffffffff, 0x00000100,
1499         0x3c234, 0xffffffff, 0x00000100,
1500         0x3c250, 0xffffffff, 0x00000100,
1501         0x3c254, 0xffffffff, 0x00000100,
1502         0x3c258, 0xffffffff, 0x00000100,
1503         0x3c25c, 0xffffffff, 0x00000100,
1504         0x3c260, 0xffffffff, 0x00000100,
1505         0x3c27c, 0xffffffff, 0x00000100,
1506         0x3c278, 0xffffffff, 0x00000100,
1507         0x3c210, 0xffffffff, 0x06000100,
1508         0x3c290, 0xffffffff, 0x00000100,
1509         0x3c274, 0xffffffff, 0x00000100,
1510         0x3c2b4, 0xffffffff, 0x00000100,
1511         0x3c2b0, 0xffffffff, 0x00000100,
1512         0x3c270, 0xffffffff, 0x00000100,
1513         0x30800, 0xffffffff, 0xe0000000,
1514         0x3c020, 0xffffffff, 0x00010000,
1515         0x3c024, 0xffffffff, 0x00030002,
1516         0x3c028, 0xffffffff, 0x00040007,
1517         0x3c02c, 0xffffffff, 0x00060005,
1518         0x3c030, 0xffffffff, 0x00090008,
1519         0x3c034, 0xffffffff, 0x00010000,
1520         0x3c038, 0xffffffff, 0x00030002,
1521         0x3c03c, 0xffffffff, 0x00040007,
1522         0x3c040, 0xffffffff, 0x00060005,
1523         0x3c044, 0xffffffff, 0x00090008,
1524         0x3c048, 0xffffffff, 0x00010000,
1525         0x3c04c, 0xffffffff, 0x00030002,
1526         0x3c050, 0xffffffff, 0x00040007,
1527         0x3c054, 0xffffffff, 0x00060005,
1528         0x3c058, 0xffffffff, 0x00090008,
1529         0x3c05c, 0xffffffff, 0x00010000,
1530         0x3c060, 0xffffffff, 0x00030002,
1531         0x3c064, 0xffffffff, 0x00040007,
1532         0x3c068, 0xffffffff, 0x00060005,
1533         0x3c06c, 0xffffffff, 0x00090008,
1534         0x3c070, 0xffffffff, 0x00010000,
1535         0x3c074, 0xffffffff, 0x00030002,
1536         0x3c078, 0xffffffff, 0x00040007,
1537         0x3c07c, 0xffffffff, 0x00060005,
1538         0x3c080, 0xffffffff, 0x00090008,
1539         0x3c084, 0xffffffff, 0x00010000,
1540         0x3c088, 0xffffffff, 0x00030002,
1541         0x3c08c, 0xffffffff, 0x00040007,
1542         0x3c090, 0xffffffff, 0x00060005,
1543         0x3c094, 0xffffffff, 0x00090008,
1544         0x3c098, 0xffffffff, 0x00010000,
1545         0x3c09c, 0xffffffff, 0x00030002,
1546         0x3c0a0, 0xffffffff, 0x00040007,
1547         0x3c0a4, 0xffffffff, 0x00060005,
1548         0x3c0a8, 0xffffffff, 0x00090008,
1549         0x3c0ac, 0xffffffff, 0x00010000,
1550         0x3c0b0, 0xffffffff, 0x00030002,
1551         0x3c0b4, 0xffffffff, 0x00040007,
1552         0x3c0b8, 0xffffffff, 0x00060005,
1553         0x3c0bc, 0xffffffff, 0x00090008,
1554         0x3c0c0, 0xffffffff, 0x00010000,
1555         0x3c0c4, 0xffffffff, 0x00030002,
1556         0x3c0c8, 0xffffffff, 0x00040007,
1557         0x3c0cc, 0xffffffff, 0x00060005,
1558         0x3c0d0, 0xffffffff, 0x00090008,
1559         0x3c0d4, 0xffffffff, 0x00010000,
1560         0x3c0d8, 0xffffffff, 0x00030002,
1561         0x3c0dc, 0xffffffff, 0x00040007,
1562         0x3c0e0, 0xffffffff, 0x00060005,
1563         0x3c0e4, 0xffffffff, 0x00090008,
1564         0x3c0e8, 0xffffffff, 0x00010000,
1565         0x3c0ec, 0xffffffff, 0x00030002,
1566         0x3c0f0, 0xffffffff, 0x00040007,
1567         0x3c0f4, 0xffffffff, 0x00060005,
1568         0x3c0f8, 0xffffffff, 0x00090008,
1569         0xc318, 0xffffffff, 0x00020200,
1570         0x3350, 0xffffffff, 0x00000200,
1571         0x15c0, 0xffffffff, 0x00000400,
1572         0x55e8, 0xffffffff, 0x00000000,
1573         0x2f50, 0xffffffff, 0x00000902,
1574         0x3c000, 0xffffffff, 0x96940200,
1575         0x8708, 0xffffffff, 0x00900100,
1576         0xc424, 0xffffffff, 0x0020003f,
1577         0x38, 0xffffffff, 0x0140001c,
1578         0x3c, 0x000f0000, 0x000f0000,
1579         0x220, 0xffffffff, 0xc060000c,
1580         0x224, 0xc0000fff, 0x00000100,
1581         0xf90, 0xffffffff, 0x00000100,
1582         0xf98, 0x00000101, 0x00000000,
1583         0x20a8, 0xffffffff, 0x00000104,
1584         0x55e4, 0xff000fff, 0x00000100,
1585         0x30cc, 0xc0000fff, 0x00000104,
1586         0xc1e4, 0x00000001, 0x00000001,
1587         0xd00c, 0xff000ff0, 0x00000100,
1588         0xd80c, 0xff000ff0, 0x00000100
1589 };
1590
1591 static const u32 godavari_golden_registers[] =
1592 {
1593         0x55e4, 0xff607fff, 0xfc000100,
1594         0x6ed8, 0x00010101, 0x00010000,
1595         0x9830, 0xffffffff, 0x00000000,
1596         0x98302, 0xf00fffff, 0x00000400,
1597         0x6130, 0xffffffff, 0x00010000,
1598         0x5bb0, 0x000000f0, 0x00000070,
1599         0x5bc0, 0xf0311fff, 0x80300000,
1600         0x98f8, 0x73773777, 0x12010001,
1601         0x98fc, 0xffffffff, 0x00000010,
1602         0x8030, 0x00001f0f, 0x0000100a,
1603         0x2f48, 0x73773777, 0x12010001,
1604         0x2408, 0x000fffff, 0x000c007f,
1605         0x8a14, 0xf000003f, 0x00000007,
1606         0x8b24, 0xffffffff, 0x00ff0fff,
1607         0x30a04, 0x0000ff0f, 0x00000000,
1608         0x28a4c, 0x07ffffff, 0x06000000,
1609         0x4d8, 0x00000fff, 0x00000100,
1610         0xd014, 0x00010000, 0x00810001,
1611         0xd814, 0x00010000, 0x00810001,
1612         0x3e78, 0x00000001, 0x00000002,
1613         0xc768, 0x00000008, 0x00000008,
1614         0xc770, 0x00000f00, 0x00000800,
1615         0xc774, 0x00000f00, 0x00000800,
1616         0xc798, 0x00ffffff, 0x00ff7fbf,
1617         0xc79c, 0x00ffffff, 0x00ff7faf,
1618         0x8c00, 0x000000ff, 0x00000001,
1619         0x214f8, 0x01ff01ff, 0x00000002,
1620         0x21498, 0x007ff800, 0x00200000,
1621         0x2015c, 0xffffffff, 0x00000f40,
1622         0x88c4, 0x001f3ae3, 0x00000082,
1623         0x88d4, 0x0000001f, 0x00000010,
1624         0x30934, 0xffffffff, 0x00000000
1625 };
1626
1627
1628 static void cik_init_golden_registers(struct radeon_device *rdev)
1629 {
1630         switch (rdev->family) {
1631         case CHIP_BONAIRE:
1632                 radeon_program_register_sequence(rdev,
1633                                                  bonaire_mgcg_cgcg_init,
1634                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635                 radeon_program_register_sequence(rdev,
1636                                                  bonaire_golden_registers,
1637                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638                 radeon_program_register_sequence(rdev,
1639                                                  bonaire_golden_common_registers,
1640                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641                 radeon_program_register_sequence(rdev,
1642                                                  bonaire_golden_spm_registers,
1643                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644                 break;
1645         case CHIP_KABINI:
1646                 radeon_program_register_sequence(rdev,
1647                                                  kalindi_mgcg_cgcg_init,
1648                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649                 radeon_program_register_sequence(rdev,
1650                                                  kalindi_golden_registers,
1651                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652                 radeon_program_register_sequence(rdev,
1653                                                  kalindi_golden_common_registers,
1654                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655                 radeon_program_register_sequence(rdev,
1656                                                  kalindi_golden_spm_registers,
1657                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658                 break;
1659         case CHIP_MULLINS:
1660                 radeon_program_register_sequence(rdev,
1661                                                  kalindi_mgcg_cgcg_init,
1662                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663                 radeon_program_register_sequence(rdev,
1664                                                  godavari_golden_registers,
1665                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1666                 radeon_program_register_sequence(rdev,
1667                                                  kalindi_golden_common_registers,
1668                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669                 radeon_program_register_sequence(rdev,
1670                                                  kalindi_golden_spm_registers,
1671                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672                 break;
1673         case CHIP_KAVERI:
1674                 radeon_program_register_sequence(rdev,
1675                                                  spectre_mgcg_cgcg_init,
1676                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677                 radeon_program_register_sequence(rdev,
1678                                                  spectre_golden_registers,
1679                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1680                 radeon_program_register_sequence(rdev,
1681                                                  spectre_golden_common_registers,
1682                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683                 radeon_program_register_sequence(rdev,
1684                                                  spectre_golden_spm_registers,
1685                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686                 break;
1687         case CHIP_HAWAII:
1688                 radeon_program_register_sequence(rdev,
1689                                                  hawaii_mgcg_cgcg_init,
1690                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691                 radeon_program_register_sequence(rdev,
1692                                                  hawaii_golden_registers,
1693                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694                 radeon_program_register_sequence(rdev,
1695                                                  hawaii_golden_common_registers,
1696                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697                 radeon_program_register_sequence(rdev,
1698                                                  hawaii_golden_spm_registers,
1699                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700                 break;
1701         default:
1702                 break;
1703         }
1704 }
1705
1706 /**
1707  * cik_get_xclk - get the xclk
1708  *
1709  * @rdev: radeon_device pointer
1710  *
1711  * Returns the reference clock used by the gfx engine
1712  * (CIK).
1713  */
1714 u32 cik_get_xclk(struct radeon_device *rdev)
1715 {
1716         u32 reference_clock = rdev->clock.spll.reference_freq;
1717
1718         if (rdev->flags & RADEON_IS_IGP) {
1719                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1720                         return reference_clock / 2;
1721         } else {
1722                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1723                         return reference_clock / 4;
1724         }
1725         return reference_clock;
1726 }
1727
1728 /**
1729  * cik_mm_rdoorbell - read a doorbell dword
1730  *
1731  * @rdev: radeon_device pointer
1732  * @index: doorbell index
1733  *
1734  * Returns the value in the doorbell aperture at the
1735  * requested doorbell index (CIK).
1736  */
1737 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1738 {
1739         if (index < rdev->doorbell.num_doorbells) {
1740                 return readl(rdev->doorbell.ptr + index);
1741         } else {
1742                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1743                 return 0;
1744         }
1745 }
1746
1747 /**
1748  * cik_mm_wdoorbell - write a doorbell dword
1749  *
1750  * @rdev: radeon_device pointer
1751  * @index: doorbell index
1752  * @v: value to write
1753  *
1754  * Writes @v to the doorbell aperture at the
1755  * requested doorbell index (CIK).
1756  */
1757 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1758 {
1759         if (index < rdev->doorbell.num_doorbells) {
1760                 writel(v, rdev->doorbell.ptr + index);
1761         } else {
1762                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1763         }
1764 }
1765
1766 #define BONAIRE_IO_MC_REGS_SIZE 36
1767
1768 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1769 {
1770         {0x00000070, 0x04400000},
1771         {0x00000071, 0x80c01803},
1772         {0x00000072, 0x00004004},
1773         {0x00000073, 0x00000100},
1774         {0x00000074, 0x00ff0000},
1775         {0x00000075, 0x34000000},
1776         {0x00000076, 0x08000014},
1777         {0x00000077, 0x00cc08ec},
1778         {0x00000078, 0x00000400},
1779         {0x00000079, 0x00000000},
1780         {0x0000007a, 0x04090000},
1781         {0x0000007c, 0x00000000},
1782         {0x0000007e, 0x4408a8e8},
1783         {0x0000007f, 0x00000304},
1784         {0x00000080, 0x00000000},
1785         {0x00000082, 0x00000001},
1786         {0x00000083, 0x00000002},
1787         {0x00000084, 0xf3e4f400},
1788         {0x00000085, 0x052024e3},
1789         {0x00000087, 0x00000000},
1790         {0x00000088, 0x01000000},
1791         {0x0000008a, 0x1c0a0000},
1792         {0x0000008b, 0xff010000},
1793         {0x0000008d, 0xffffefff},
1794         {0x0000008e, 0xfff3efff},
1795         {0x0000008f, 0xfff3efbf},
1796         {0x00000092, 0xf7ffffff},
1797         {0x00000093, 0xffffff7f},
1798         {0x00000095, 0x00101101},
1799         {0x00000096, 0x00000fff},
1800         {0x00000097, 0x00116fff},
1801         {0x00000098, 0x60010000},
1802         {0x00000099, 0x10010000},
1803         {0x0000009a, 0x00006000},
1804         {0x0000009b, 0x00001000},
1805         {0x0000009f, 0x00b48000}
1806 };
1807
1808 #define HAWAII_IO_MC_REGS_SIZE 22
1809
1810 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1811 {
1812         {0x0000007d, 0x40000000},
1813         {0x0000007e, 0x40180304},
1814         {0x0000007f, 0x0000ff00},
1815         {0x00000081, 0x00000000},
1816         {0x00000083, 0x00000800},
1817         {0x00000086, 0x00000000},
1818         {0x00000087, 0x00000100},
1819         {0x00000088, 0x00020100},
1820         {0x00000089, 0x00000000},
1821         {0x0000008b, 0x00040000},
1822         {0x0000008c, 0x00000100},
1823         {0x0000008e, 0xff010000},
1824         {0x00000090, 0xffffefff},
1825         {0x00000091, 0xfff3efff},
1826         {0x00000092, 0xfff3efbf},
1827         {0x00000093, 0xf7ffffff},
1828         {0x00000094, 0xffffff7f},
1829         {0x00000095, 0x00000fff},
1830         {0x00000096, 0x00116fff},
1831         {0x00000097, 0x60010000},
1832         {0x00000098, 0x10010000},
1833         {0x0000009f, 0x00c79000}
1834 };
1835
1836
1837 /**
1838  * cik_srbm_select - select specific register instances
1839  *
1840  * @rdev: radeon_device pointer
1841  * @me: selected ME (micro engine)
1842  * @pipe: pipe
1843  * @queue: queue
1844  * @vmid: VMID
1845  *
1846  * Switches the currently active registers instances.  Some
1847  * registers are instanced per VMID, others are instanced per
1848  * me/pipe/queue combination.
1849  */
1850 static void cik_srbm_select(struct radeon_device *rdev,
1851                             u32 me, u32 pipe, u32 queue, u32 vmid)
1852 {
1853         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1854                              MEID(me & 0x3) |
1855                              VMID(vmid & 0xf) |
1856                              QUEUEID(queue & 0x7));
1857         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1858 }
1859
1860 /* ucode loading */
1861 /**
1862  * ci_mc_load_microcode - load MC ucode into the hw
1863  *
1864  * @rdev: radeon_device pointer
1865  *
1866  * Load the GDDR MC ucode into the hw (CIK).
1867  * Returns 0 on success, error on failure.
1868  */
1869 int ci_mc_load_microcode(struct radeon_device *rdev)
1870 {
1871         const __be32 *fw_data = NULL;
1872         const __le32 *new_fw_data = NULL;
1873         u32 running, tmp;
1874         u32 *io_mc_regs = NULL;
1875         const __le32 *new_io_mc_regs = NULL;
1876         int i, regs_size, ucode_size;
1877
1878         if (!rdev->mc_fw)
1879                 return -EINVAL;
1880
1881         if (rdev->new_fw) {
1882                 const struct mc_firmware_header_v1_0 *hdr =
1883                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1884
1885                 radeon_ucode_print_mc_hdr(&hdr->header);
1886
1887                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1888                 new_io_mc_regs = (const __le32 *)
1889                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1890                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1891                 new_fw_data = (const __le32 *)
1892                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1893         } else {
1894                 ucode_size = rdev->mc_fw->size / 4;
1895
1896                 switch (rdev->family) {
1897                 case CHIP_BONAIRE:
1898                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1899                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1900                         break;
1901                 case CHIP_HAWAII:
1902                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1903                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1904                         break;
1905                 default:
1906                         return -EINVAL;
1907                 }
1908                 fw_data = (const __be32 *)rdev->mc_fw->data;
1909         }
1910
1911         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1912
1913         if (running == 0) {
1914                 /* reset the engine and set to writable */
1915                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1916                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1917
1918                 /* load mc io regs */
1919                 for (i = 0; i < regs_size; i++) {
1920                         if (rdev->new_fw) {
1921                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1922                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1923                         } else {
1924                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1925                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1926                         }
1927                 }
1928
1929                 tmp = RREG32(MC_SEQ_MISC0);
1930                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1931                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1932                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1933                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1934                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1935                 }
1936
1937                 /* load the MC ucode */
1938                 for (i = 0; i < ucode_size; i++) {
1939                         if (rdev->new_fw)
1940                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1941                         else
1942                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1943                 }
1944
1945                 /* put the engine back into the active state */
1946                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1947                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1948                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1949
1950                 /* wait for training to complete */
1951                 for (i = 0; i < rdev->usec_timeout; i++) {
1952                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1953                                 break;
1954                         udelay(1);
1955                 }
1956                 for (i = 0; i < rdev->usec_timeout; i++) {
1957                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1958                                 break;
1959                         udelay(1);
1960                 }
1961         }
1962
1963         return 0;
1964 }
1965
1966 /**
1967  * cik_init_microcode - load ucode images from disk
1968  *
1969  * @rdev: radeon_device pointer
1970  *
1971  * Use the firmware interface to load the ucode images into
1972  * the driver (not loaded into hw).
1973  * Returns 0 on success, error on failure.
1974  */
1975 static int cik_init_microcode(struct radeon_device *rdev)
1976 {
1977         const char *chip_name;
1978         const char *new_chip_name;
1979         size_t pfp_req_size, me_req_size, ce_req_size,
1980                 mec_req_size, rlc_req_size, mc_req_size = 0,
1981                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1982         char fw_name[30];
1983         int new_fw = 0;
1984         int err;
1985         int num_fw;
1986         bool new_smc = false;
1987
1988         DRM_DEBUG("\n");
1989
1990         switch (rdev->family) {
1991         case CHIP_BONAIRE:
1992                 chip_name = "BONAIRE";
1993                 if ((rdev->pdev->revision == 0x80) ||
1994                     (rdev->pdev->revision == 0x81) ||
1995                     (rdev->pdev->device == 0x665f))
1996                         new_smc = true;
1997                 new_chip_name = "bonaire";
1998                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1999                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2000                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2001                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2002                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2003                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2004                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2005                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2006                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2007                 num_fw = 8;
2008                 break;
2009         case CHIP_HAWAII:
2010                 chip_name = "HAWAII";
2011                 if (rdev->pdev->revision == 0x80)
2012                         new_smc = true;
2013                 new_chip_name = "hawaii";
2014                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023                 num_fw = 8;
2024                 break;
2025         case CHIP_KAVERI:
2026                 chip_name = "KAVERI";
2027                 new_chip_name = "kaveri";
2028                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2030                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034                 num_fw = 7;
2035                 break;
2036         case CHIP_KABINI:
2037                 chip_name = "KABINI";
2038                 new_chip_name = "kabini";
2039                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2041                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045                 num_fw = 6;
2046                 break;
2047         case CHIP_MULLINS:
2048                 chip_name = "MULLINS";
2049                 new_chip_name = "mullins";
2050                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2052                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056                 num_fw = 6;
2057                 break;
2058         default: BUG();
2059         }
2060
2061         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065         if (err) {
2066                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068                 if (err)
2069                         goto out;
2070                 if (rdev->pfp_fw->size != pfp_req_size) {
2071                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2072                                rdev->pfp_fw->size, fw_name);
2073                         err = -EINVAL;
2074                         goto out;
2075                 }
2076         } else {
2077                 err = radeon_ucode_validate(rdev->pfp_fw);
2078                 if (err) {
2079                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2080                                fw_name);
2081                         goto out;
2082                 } else {
2083                         new_fw++;
2084                 }
2085         }
2086
2087         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2088         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2089         if (err) {
2090                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2091                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092                 if (err)
2093                         goto out;
2094                 if (rdev->me_fw->size != me_req_size) {
2095                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2096                                rdev->me_fw->size, fw_name);
2097                         err = -EINVAL;
2098                 }
2099         } else {
2100                 err = radeon_ucode_validate(rdev->me_fw);
2101                 if (err) {
2102                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2103                                fw_name);
2104                         goto out;
2105                 } else {
2106                         new_fw++;
2107                 }
2108         }
2109
2110         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2111         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2112         if (err) {
2113                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2114                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115                 if (err)
2116                         goto out;
2117                 if (rdev->ce_fw->size != ce_req_size) {
2118                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2119                                rdev->ce_fw->size, fw_name);
2120                         err = -EINVAL;
2121                 }
2122         } else {
2123                 err = radeon_ucode_validate(rdev->ce_fw);
2124                 if (err) {
2125                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2126                                fw_name);
2127                         goto out;
2128                 } else {
2129                         new_fw++;
2130                 }
2131         }
2132
2133         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2134         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2135         if (err) {
2136                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2137                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138                 if (err)
2139                         goto out;
2140                 if (rdev->mec_fw->size != mec_req_size) {
2141                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2142                                rdev->mec_fw->size, fw_name);
2143                         err = -EINVAL;
2144                 }
2145         } else {
2146                 err = radeon_ucode_validate(rdev->mec_fw);
2147                 if (err) {
2148                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2149                                fw_name);
2150                         goto out;
2151                 } else {
2152                         new_fw++;
2153                 }
2154         }
2155
2156         if (rdev->family == CHIP_KAVERI) {
2157                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2158                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2159                 if (err) {
2160                         goto out;
2161                 } else {
2162                         err = radeon_ucode_validate(rdev->mec2_fw);
2163                         if (err) {
2164                                 goto out;
2165                         } else {
2166                                 new_fw++;
2167                         }
2168                 }
2169         }
2170
2171         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2172         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2173         if (err) {
2174                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2175                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176                 if (err)
2177                         goto out;
2178                 if (rdev->rlc_fw->size != rlc_req_size) {
2179                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2180                                rdev->rlc_fw->size, fw_name);
2181                         err = -EINVAL;
2182                 }
2183         } else {
2184                 err = radeon_ucode_validate(rdev->rlc_fw);
2185                 if (err) {
2186                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2187                                fw_name);
2188                         goto out;
2189                 } else {
2190                         new_fw++;
2191                 }
2192         }
2193
2194         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2195         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2196         if (err) {
2197                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2198                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199                 if (err)
2200                         goto out;
2201                 if (rdev->sdma_fw->size != sdma_req_size) {
2202                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2203                                rdev->sdma_fw->size, fw_name);
2204                         err = -EINVAL;
2205                 }
2206         } else {
2207                 err = radeon_ucode_validate(rdev->sdma_fw);
2208                 if (err) {
2209                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2210                                fw_name);
2211                         goto out;
2212                 } else {
2213                         new_fw++;
2214                 }
2215         }
2216
2217         /* No SMC, MC ucode on APUs */
2218         if (!(rdev->flags & RADEON_IS_IGP)) {
2219                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2220                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2221                 if (err) {
2222                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2223                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224                         if (err) {
2225                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2226                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227                                 if (err)
2228                                         goto out;
2229                         }
2230                         if ((rdev->mc_fw->size != mc_req_size) &&
2231                             (rdev->mc_fw->size != mc2_req_size)){
2232                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2233                                        rdev->mc_fw->size, fw_name);
2234                                 err = -EINVAL;
2235                         }
2236                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2237                 } else {
2238                         err = radeon_ucode_validate(rdev->mc_fw);
2239                         if (err) {
2240                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2241                                        fw_name);
2242                                 goto out;
2243                         } else {
2244                                 new_fw++;
2245                         }
2246                 }
2247
2248                 if (new_smc)
2249                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2250                 else
2251                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2252                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2253                 if (err) {
2254                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2255                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256                         if (err) {
2257                                 pr_err("smc: error loading firmware \"%s\"\n",
2258                                        fw_name);
2259                                 release_firmware(rdev->smc_fw);
2260                                 rdev->smc_fw = NULL;
2261                                 err = 0;
2262                         } else if (rdev->smc_fw->size != smc_req_size) {
2263                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2264                                        rdev->smc_fw->size, fw_name);
2265                                 err = -EINVAL;
2266                         }
2267                 } else {
2268                         err = radeon_ucode_validate(rdev->smc_fw);
2269                         if (err) {
2270                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2271                                        fw_name);
2272                                 goto out;
2273                         } else {
2274                                 new_fw++;
2275                         }
2276                 }
2277         }
2278
2279         if (new_fw == 0) {
2280                 rdev->new_fw = false;
2281         } else if (new_fw < num_fw) {
2282                 pr_err("ci_fw: mixing new and old firmware!\n");
2283                 err = -EINVAL;
2284         } else {
2285                 rdev->new_fw = true;
2286         }
2287
2288 out:
2289         if (err) {
2290                 if (err != -EINVAL)
2291                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2292                                fw_name);
2293                 release_firmware(rdev->pfp_fw);
2294                 rdev->pfp_fw = NULL;
2295                 release_firmware(rdev->me_fw);
2296                 rdev->me_fw = NULL;
2297                 release_firmware(rdev->ce_fw);
2298                 rdev->ce_fw = NULL;
2299                 release_firmware(rdev->mec_fw);
2300                 rdev->mec_fw = NULL;
2301                 release_firmware(rdev->mec2_fw);
2302                 rdev->mec2_fw = NULL;
2303                 release_firmware(rdev->rlc_fw);
2304                 rdev->rlc_fw = NULL;
2305                 release_firmware(rdev->sdma_fw);
2306                 rdev->sdma_fw = NULL;
2307                 release_firmware(rdev->mc_fw);
2308                 rdev->mc_fw = NULL;
2309                 release_firmware(rdev->smc_fw);
2310                 rdev->smc_fw = NULL;
2311         }
2312         return err;
2313 }
2314
2315 /*
2316  * Core functions
2317  */
2318 /**
2319  * cik_tiling_mode_table_init - init the hw tiling table
2320  *
2321  * @rdev: radeon_device pointer
2322  *
2323  * Starting with SI, the tiling setup is done globally in a
2324  * set of 32 tiling modes.  Rather than selecting each set of
2325  * parameters per surface as on older asics, we just select
2326  * which index in the tiling table we want to use, and the
2327  * surface uses those parameters (CIK).
2328  */
2329 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2330 {
2331         u32 *tile = rdev->config.cik.tile_mode_array;
2332         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2333         const u32 num_tile_mode_states =
2334                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2335         const u32 num_secondary_tile_mode_states =
2336                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2337         u32 reg_offset, split_equal_to_row_size;
2338         u32 num_pipe_configs;
2339         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2340                 rdev->config.cik.max_shader_engines;
2341
2342         switch (rdev->config.cik.mem_row_size_in_kb) {
2343         case 1:
2344                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2345                 break;
2346         case 2:
2347         default:
2348                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2349                 break;
2350         case 4:
2351                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2352                 break;
2353         }
2354
2355         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2356         if (num_pipe_configs > 8)
2357                 num_pipe_configs = 16;
2358
2359         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2360                 tile[reg_offset] = 0;
2361         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2362                 macrotile[reg_offset] = 0;
2363
2364         switch(num_pipe_configs) {
2365         case 16:
2366                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2368                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2370                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2374                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2378                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2382                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                            TILE_SPLIT(split_equal_to_row_size));
2386                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2387                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2389                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(split_equal_to_row_size));
2397                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2398                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2399                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2402                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2404                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2409                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2411                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2417                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2432                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444
2445                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                            NUM_BANKS(ADDR_SURF_16_BANK));
2449                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                            NUM_BANKS(ADDR_SURF_16_BANK));
2453                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                            NUM_BANKS(ADDR_SURF_16_BANK));
2457                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                            NUM_BANKS(ADDR_SURF_16_BANK));
2461                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                            NUM_BANKS(ADDR_SURF_8_BANK));
2465                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                            NUM_BANKS(ADDR_SURF_4_BANK));
2469                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                            NUM_BANKS(ADDR_SURF_2_BANK));
2473                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                            NUM_BANKS(ADDR_SURF_16_BANK));
2477                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                            NUM_BANKS(ADDR_SURF_16_BANK));
2481                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                             NUM_BANKS(ADDR_SURF_16_BANK));
2485                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                             NUM_BANKS(ADDR_SURF_8_BANK));
2489                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                             NUM_BANKS(ADDR_SURF_4_BANK));
2493                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496                             NUM_BANKS(ADDR_SURF_2_BANK));
2497                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500                             NUM_BANKS(ADDR_SURF_2_BANK));
2501
2502                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2503                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2504                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2506                 break;
2507
2508         case 8:
2509                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2511                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2513                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2517                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2521                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2525                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                            TILE_SPLIT(split_equal_to_row_size));
2529                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2533                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(split_equal_to_row_size));
2540                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2545                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2552                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2560                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2562                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2575                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2577                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587
2588                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2591                                 NUM_BANKS(ADDR_SURF_16_BANK));
2592                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2595                                 NUM_BANKS(ADDR_SURF_16_BANK));
2596                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599                                 NUM_BANKS(ADDR_SURF_16_BANK));
2600                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603                                 NUM_BANKS(ADDR_SURF_16_BANK));
2604                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2607                                 NUM_BANKS(ADDR_SURF_8_BANK));
2608                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611                                 NUM_BANKS(ADDR_SURF_4_BANK));
2612                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615                                 NUM_BANKS(ADDR_SURF_2_BANK));
2616                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2618                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619                                 NUM_BANKS(ADDR_SURF_16_BANK));
2620                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623                                 NUM_BANKS(ADDR_SURF_16_BANK));
2624                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627                                 NUM_BANKS(ADDR_SURF_16_BANK));
2628                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631                                 NUM_BANKS(ADDR_SURF_16_BANK));
2632                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635                                 NUM_BANKS(ADDR_SURF_8_BANK));
2636                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639                                 NUM_BANKS(ADDR_SURF_4_BANK));
2640                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643                                 NUM_BANKS(ADDR_SURF_2_BANK));
2644
2645                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2646                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2647                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2648                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2649                 break;
2650
2651         case 4:
2652                 if (num_rbs == 4) {
2653                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2657                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2661                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2665                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2669                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                            TILE_SPLIT(split_equal_to_row_size));
2673                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2677                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(split_equal_to_row_size));
2684                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2685                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2686                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2687                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2689                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2691                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2696                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2698                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2704                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2719                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2720                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2721                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731
2732                 } else if (num_rbs < 4) {
2733                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2735                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2737                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2741                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2745                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2749                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752                            TILE_SPLIT(split_equal_to_row_size));
2753                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2756                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2757                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(split_equal_to_row_size));
2764                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2765                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2766                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2769                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2771                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2773                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2784                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2786                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2799                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2800                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 }
2812
2813                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816                                 NUM_BANKS(ADDR_SURF_16_BANK));
2817                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828                                 NUM_BANKS(ADDR_SURF_16_BANK));
2829                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                 NUM_BANKS(ADDR_SURF_8_BANK));
2837                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2840                                 NUM_BANKS(ADDR_SURF_4_BANK));
2841                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844                                 NUM_BANKS(ADDR_SURF_16_BANK));
2845                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2847                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848                                 NUM_BANKS(ADDR_SURF_16_BANK));
2849                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864                                 NUM_BANKS(ADDR_SURF_8_BANK));
2865                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868                                 NUM_BANKS(ADDR_SURF_4_BANK));
2869
2870                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2871                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2872                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2873                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2874                 break;
2875
2876         case 2:
2877                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879                            PIPE_CONFIG(ADDR_SURF_P2) |
2880                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2881                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883                            PIPE_CONFIG(ADDR_SURF_P2) |
2884                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2885                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887                            PIPE_CONFIG(ADDR_SURF_P2) |
2888                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2889                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891                            PIPE_CONFIG(ADDR_SURF_P2) |
2892                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2893                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895                            PIPE_CONFIG(ADDR_SURF_P2) |
2896                            TILE_SPLIT(split_equal_to_row_size));
2897                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898                            PIPE_CONFIG(ADDR_SURF_P2) |
2899                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2901                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902                            PIPE_CONFIG(ADDR_SURF_P2) |
2903                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(split_equal_to_row_size));
2908                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2909                            PIPE_CONFIG(ADDR_SURF_P2);
2910                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2911                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912                            PIPE_CONFIG(ADDR_SURF_P2));
2913                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915                             PIPE_CONFIG(ADDR_SURF_P2) |
2916                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919                             PIPE_CONFIG(ADDR_SURF_P2) |
2920                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2922                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923                             PIPE_CONFIG(ADDR_SURF_P2) |
2924                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926                             PIPE_CONFIG(ADDR_SURF_P2) |
2927                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2928                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2930                             PIPE_CONFIG(ADDR_SURF_P2) |
2931                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934                             PIPE_CONFIG(ADDR_SURF_P2) |
2935                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938                             PIPE_CONFIG(ADDR_SURF_P2) |
2939                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2942                             PIPE_CONFIG(ADDR_SURF_P2));
2943                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2944                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945                             PIPE_CONFIG(ADDR_SURF_P2) |
2946                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949                             PIPE_CONFIG(ADDR_SURF_P2) |
2950                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955
2956                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959                                 NUM_BANKS(ADDR_SURF_16_BANK));
2960                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                 NUM_BANKS(ADDR_SURF_16_BANK));
2964                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967                                 NUM_BANKS(ADDR_SURF_16_BANK));
2968                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                 NUM_BANKS(ADDR_SURF_16_BANK));
2972                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                 NUM_BANKS(ADDR_SURF_16_BANK));
2976                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979                                 NUM_BANKS(ADDR_SURF_16_BANK));
2980                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983                                 NUM_BANKS(ADDR_SURF_8_BANK));
2984                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987                                 NUM_BANKS(ADDR_SURF_16_BANK));
2988                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991                                 NUM_BANKS(ADDR_SURF_16_BANK));
2992                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                 NUM_BANKS(ADDR_SURF_16_BANK));
2996                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                 NUM_BANKS(ADDR_SURF_16_BANK));
3004                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011                                 NUM_BANKS(ADDR_SURF_8_BANK));
3012
3013                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3014                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3015                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3016                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3017                 break;
3018
3019         default:
3020                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3021         }
3022 }
3023
3024 /**
3025  * cik_select_se_sh - select which SE, SH to address
3026  *
3027  * @rdev: radeon_device pointer
3028  * @se_num: shader engine to address
3029  * @sh_num: sh block to address
3030  *
3031  * Select which SE, SH combinations to address. Certain
3032  * registers are instanced per SE or SH.  0xffffffff means
3033  * broadcast to all SEs or SHs (CIK).
3034  */
3035 static void cik_select_se_sh(struct radeon_device *rdev,
3036                              u32 se_num, u32 sh_num)
3037 {
3038         u32 data = INSTANCE_BROADCAST_WRITES;
3039
3040         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3041                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3042         else if (se_num == 0xffffffff)
3043                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3044         else if (sh_num == 0xffffffff)
3045                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3046         else
3047                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3048         WREG32(GRBM_GFX_INDEX, data);
3049 }
3050
3051 /**
3052  * cik_create_bitmask - create a bitmask
3053  *
3054  * @bit_width: length of the mask
3055  *
3056  * create a variable length bit mask (CIK).
3057  * Returns the bitmask.
3058  */
3059 static u32 cik_create_bitmask(u32 bit_width)
3060 {
3061         u32 i, mask = 0;
3062
3063         for (i = 0; i < bit_width; i++) {
3064                 mask <<= 1;
3065                 mask |= 1;
3066         }
3067         return mask;
3068 }
3069
3070 /**
3071  * cik_get_rb_disabled - computes the mask of disabled RBs
3072  *
3073  * @rdev: radeon_device pointer
3074  * @max_rb_num: max RBs (render backends) for the asic
3075  * @se_num: number of SEs (shader engines) for the asic
3076  * @sh_per_se: number of SH blocks per SE for the asic
3077  *
3078  * Calculates the bitmask of disabled RBs (CIK).
3079  * Returns the disabled RB bitmask.
3080  */
3081 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3082                               u32 max_rb_num_per_se,
3083                               u32 sh_per_se)
3084 {
3085         u32 data, mask;
3086
3087         data = RREG32(CC_RB_BACKEND_DISABLE);
3088         if (data & 1)
3089                 data &= BACKEND_DISABLE_MASK;
3090         else
3091                 data = 0;
3092         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3093
3094         data >>= BACKEND_DISABLE_SHIFT;
3095
3096         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3097
3098         return data & mask;
3099 }
3100
3101 /**
3102  * cik_setup_rb - setup the RBs on the asic
3103  *
3104  * @rdev: radeon_device pointer
3105  * @se_num: number of SEs (shader engines) for the asic
3106  * @sh_per_se: number of SH blocks per SE for the asic
3107  * @max_rb_num: max RBs (render backends) for the asic
3108  *
3109  * Configures per-SE/SH RB registers (CIK).
3110  */
3111 static void cik_setup_rb(struct radeon_device *rdev,
3112                          u32 se_num, u32 sh_per_se,
3113                          u32 max_rb_num_per_se)
3114 {
3115         int i, j;
3116         u32 data, mask;
3117         u32 disabled_rbs = 0;
3118         u32 enabled_rbs = 0;
3119
3120         for (i = 0; i < se_num; i++) {
3121                 for (j = 0; j < sh_per_se; j++) {
3122                         cik_select_se_sh(rdev, i, j);
3123                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3124                         if (rdev->family == CHIP_HAWAII)
3125                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3126                         else
3127                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3128                 }
3129         }
3130         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3131
3132         mask = 1;
3133         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3134                 if (!(disabled_rbs & mask))
3135                         enabled_rbs |= mask;
3136                 mask <<= 1;
3137         }
3138
3139         rdev->config.cik.backend_enable_mask = enabled_rbs;
3140
3141         for (i = 0; i < se_num; i++) {
3142                 cik_select_se_sh(rdev, i, 0xffffffff);
3143                 data = 0;
3144                 for (j = 0; j < sh_per_se; j++) {
3145                         switch (enabled_rbs & 3) {
3146                         case 0:
3147                                 if (j == 0)
3148                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3149                                 else
3150                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3151                                 break;
3152                         case 1:
3153                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3154                                 break;
3155                         case 2:
3156                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3157                                 break;
3158                         case 3:
3159                         default:
3160                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3161                                 break;
3162                         }
3163                         enabled_rbs >>= 2;
3164                 }
3165                 WREG32(PA_SC_RASTER_CONFIG, data);
3166         }
3167         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3168 }
3169
3170 /**
3171  * cik_gpu_init - setup the 3D engine
3172  *
3173  * @rdev: radeon_device pointer
3174  *
3175  * Configures the 3D engine and tiling configuration
3176  * registers so that the 3D engine is usable.
3177  */
3178 static void cik_gpu_init(struct radeon_device *rdev)
3179 {
3180         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3181         u32 mc_shared_chmap, mc_arb_ramcfg;
3182         u32 hdp_host_path_cntl;
3183         u32 tmp;
3184         int i, j;
3185
3186         switch (rdev->family) {
3187         case CHIP_BONAIRE:
3188                 rdev->config.cik.max_shader_engines = 2;
3189                 rdev->config.cik.max_tile_pipes = 4;
3190                 rdev->config.cik.max_cu_per_sh = 7;
3191                 rdev->config.cik.max_sh_per_se = 1;
3192                 rdev->config.cik.max_backends_per_se = 2;
3193                 rdev->config.cik.max_texture_channel_caches = 4;
3194                 rdev->config.cik.max_gprs = 256;
3195                 rdev->config.cik.max_gs_threads = 32;
3196                 rdev->config.cik.max_hw_contexts = 8;
3197
3198                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3199                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3200                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3201                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3202                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3203                 break;
3204         case CHIP_HAWAII:
3205                 rdev->config.cik.max_shader_engines = 4;
3206                 rdev->config.cik.max_tile_pipes = 16;
3207                 rdev->config.cik.max_cu_per_sh = 11;
3208                 rdev->config.cik.max_sh_per_se = 1;
3209                 rdev->config.cik.max_backends_per_se = 4;
3210                 rdev->config.cik.max_texture_channel_caches = 16;
3211                 rdev->config.cik.max_gprs = 256;
3212                 rdev->config.cik.max_gs_threads = 32;
3213                 rdev->config.cik.max_hw_contexts = 8;
3214
3215                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3220                 break;
3221         case CHIP_KAVERI:
3222                 rdev->config.cik.max_shader_engines = 1;
3223                 rdev->config.cik.max_tile_pipes = 4;
3224                 if ((rdev->pdev->device == 0x1304) ||
3225                     (rdev->pdev->device == 0x1305) ||
3226                     (rdev->pdev->device == 0x130C) ||
3227                     (rdev->pdev->device == 0x130F) ||
3228                     (rdev->pdev->device == 0x1310) ||
3229                     (rdev->pdev->device == 0x1311) ||
3230                     (rdev->pdev->device == 0x131C)) {
3231                         rdev->config.cik.max_cu_per_sh = 8;
3232                         rdev->config.cik.max_backends_per_se = 2;
3233                 } else if ((rdev->pdev->device == 0x1309) ||
3234                            (rdev->pdev->device == 0x130A) ||
3235                            (rdev->pdev->device == 0x130D) ||
3236                            (rdev->pdev->device == 0x1313) ||
3237                            (rdev->pdev->device == 0x131D)) {
3238                         rdev->config.cik.max_cu_per_sh = 6;
3239                         rdev->config.cik.max_backends_per_se = 2;
3240                 } else if ((rdev->pdev->device == 0x1306) ||
3241                            (rdev->pdev->device == 0x1307) ||
3242                            (rdev->pdev->device == 0x130B) ||
3243                            (rdev->pdev->device == 0x130E) ||
3244                            (rdev->pdev->device == 0x1315) ||
3245                            (rdev->pdev->device == 0x1318) ||
3246                            (rdev->pdev->device == 0x131B)) {
3247                         rdev->config.cik.max_cu_per_sh = 4;
3248                         rdev->config.cik.max_backends_per_se = 1;
3249                 } else {
3250                         rdev->config.cik.max_cu_per_sh = 3;
3251                         rdev->config.cik.max_backends_per_se = 1;
3252                 }
3253                 rdev->config.cik.max_sh_per_se = 1;
3254                 rdev->config.cik.max_texture_channel_caches = 4;
3255                 rdev->config.cik.max_gprs = 256;
3256                 rdev->config.cik.max_gs_threads = 16;
3257                 rdev->config.cik.max_hw_contexts = 8;
3258
3259                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3260                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3261                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3262                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3263                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3264                 break;
3265         case CHIP_KABINI:
3266         case CHIP_MULLINS:
3267         default:
3268                 rdev->config.cik.max_shader_engines = 1;
3269                 rdev->config.cik.max_tile_pipes = 2;
3270                 rdev->config.cik.max_cu_per_sh = 2;
3271                 rdev->config.cik.max_sh_per_se = 1;
3272                 rdev->config.cik.max_backends_per_se = 1;
3273                 rdev->config.cik.max_texture_channel_caches = 2;
3274                 rdev->config.cik.max_gprs = 256;
3275                 rdev->config.cik.max_gs_threads = 16;
3276                 rdev->config.cik.max_hw_contexts = 8;
3277
3278                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3279                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3280                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3281                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3282                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3283                 break;
3284         }
3285
3286         /* Initialize HDP */
3287         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3288                 WREG32((0x2c14 + j), 0x00000000);
3289                 WREG32((0x2c18 + j), 0x00000000);
3290                 WREG32((0x2c1c + j), 0x00000000);
3291                 WREG32((0x2c20 + j), 0x00000000);
3292                 WREG32((0x2c24 + j), 0x00000000);
3293         }
3294
3295         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3296         WREG32(SRBM_INT_CNTL, 0x1);
3297         WREG32(SRBM_INT_ACK, 0x1);
3298
3299         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3300
3301         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3302         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3303
3304         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3305         rdev->config.cik.mem_max_burst_length_bytes = 256;
3306         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3307         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3308         if (rdev->config.cik.mem_row_size_in_kb > 4)
3309                 rdev->config.cik.mem_row_size_in_kb = 4;
3310         /* XXX use MC settings? */
3311         rdev->config.cik.shader_engine_tile_size = 32;
3312         rdev->config.cik.num_gpus = 1;
3313         rdev->config.cik.multi_gpu_tile_size = 64;
3314
3315         /* fix up row size */
3316         gb_addr_config &= ~ROW_SIZE_MASK;
3317         switch (rdev->config.cik.mem_row_size_in_kb) {
3318         case 1:
3319         default:
3320                 gb_addr_config |= ROW_SIZE(0);
3321                 break;
3322         case 2:
3323                 gb_addr_config |= ROW_SIZE(1);
3324                 break;
3325         case 4:
3326                 gb_addr_config |= ROW_SIZE(2);
3327                 break;
3328         }
3329
3330         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3331          * not have bank info, so create a custom tiling dword.
3332          * bits 3:0   num_pipes
3333          * bits 7:4   num_banks
3334          * bits 11:8  group_size
3335          * bits 15:12 row_size
3336          */
3337         rdev->config.cik.tile_config = 0;
3338         switch (rdev->config.cik.num_tile_pipes) {
3339         case 1:
3340                 rdev->config.cik.tile_config |= (0 << 0);
3341                 break;
3342         case 2:
3343                 rdev->config.cik.tile_config |= (1 << 0);
3344                 break;
3345         case 4:
3346                 rdev->config.cik.tile_config |= (2 << 0);
3347                 break;
3348         case 8:
3349         default:
3350                 /* XXX what about 12? */
3351                 rdev->config.cik.tile_config |= (3 << 0);
3352                 break;
3353         }
3354         rdev->config.cik.tile_config |=
3355                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3356         rdev->config.cik.tile_config |=
3357                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3358         rdev->config.cik.tile_config |=
3359                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3360
3361         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3362         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3363         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3364         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3365         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3366         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3367         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3368         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3369
3370         cik_tiling_mode_table_init(rdev);
3371
3372         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3373                      rdev->config.cik.max_sh_per_se,
3374                      rdev->config.cik.max_backends_per_se);
3375
3376         rdev->config.cik.active_cus = 0;
3377         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3378                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3379                         rdev->config.cik.active_cus +=
3380                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3381                 }
3382         }
3383
3384         /* set HW defaults for 3D engine */
3385         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3386
3387         WREG32(SX_DEBUG_1, 0x20);
3388
3389         WREG32(TA_CNTL_AUX, 0x00010000);
3390
3391         tmp = RREG32(SPI_CONFIG_CNTL);
3392         tmp |= 0x03000000;
3393         WREG32(SPI_CONFIG_CNTL, tmp);
3394
3395         WREG32(SQ_CONFIG, 1);
3396
3397         WREG32(DB_DEBUG, 0);
3398
3399         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3400         tmp |= 0x00000400;
3401         WREG32(DB_DEBUG2, tmp);
3402
3403         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3404         tmp |= 0x00020200;
3405         WREG32(DB_DEBUG3, tmp);
3406
3407         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3408         tmp |= 0x00018208;
3409         WREG32(CB_HW_CONTROL, tmp);
3410
3411         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3412
3413         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3414                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3415                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3416                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3417
3418         WREG32(VGT_NUM_INSTANCES, 1);
3419
3420         WREG32(CP_PERFMON_CNTL, 0);
3421
3422         WREG32(SQ_CONFIG, 0);
3423
3424         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3425                                           FORCE_EOV_MAX_REZ_CNT(255)));
3426
3427         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3428                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3429
3430         WREG32(VGT_GS_VERTEX_REUSE, 16);
3431         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3432
3433         tmp = RREG32(HDP_MISC_CNTL);
3434         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3435         WREG32(HDP_MISC_CNTL, tmp);
3436
3437         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3438         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3439
3440         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3441         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3442
3443         udelay(50);
3444 }
3445
3446 /*
3447  * GPU scratch registers helpers function.
3448  */
3449 /**
3450  * cik_scratch_init - setup driver info for CP scratch regs
3451  *
3452  * @rdev: radeon_device pointer
3453  *
3454  * Set up the number and offset of the CP scratch registers.
3455  * NOTE: use of CP scratch registers is a legacy inferface and
3456  * is not used by default on newer asics (r6xx+).  On newer asics,
3457  * memory buffers are used for fences rather than scratch regs.
3458  */
3459 static void cik_scratch_init(struct radeon_device *rdev)
3460 {
3461         int i;
3462
3463         rdev->scratch.num_reg = 7;
3464         rdev->scratch.reg_base = SCRATCH_REG0;
3465         for (i = 0; i < rdev->scratch.num_reg; i++) {
3466                 rdev->scratch.free[i] = true;
3467                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3468         }
3469 }
3470
3471 /**
3472  * cik_ring_test - basic gfx ring test
3473  *
3474  * @rdev: radeon_device pointer
3475  * @ring: radeon_ring structure holding ring information
3476  *
3477  * Allocate a scratch register and write to it using the gfx ring (CIK).
3478  * Provides a basic gfx ring test to verify that the ring is working.
3479  * Used by cik_cp_gfx_resume();
3480  * Returns 0 on success, error on failure.
3481  */
3482 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3483 {
3484         uint32_t scratch;
3485         uint32_t tmp = 0;
3486         unsigned i;
3487         int r;
3488
3489         r = radeon_scratch_get(rdev, &scratch);
3490         if (r) {
3491                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3492                 return r;
3493         }
3494         WREG32(scratch, 0xCAFEDEAD);
3495         r = radeon_ring_lock(rdev, ring, 3);
3496         if (r) {
3497                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3498                 radeon_scratch_free(rdev, scratch);
3499                 return r;
3500         }
3501         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3502         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3503         radeon_ring_write(ring, 0xDEADBEEF);
3504         radeon_ring_unlock_commit(rdev, ring, false);
3505
3506         for (i = 0; i < rdev->usec_timeout; i++) {
3507                 tmp = RREG32(scratch);
3508                 if (tmp == 0xDEADBEEF)
3509                         break;
3510                 DRM_UDELAY(1);
3511         }
3512         if (i < rdev->usec_timeout) {
3513                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3514         } else {
3515                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3516                           ring->idx, scratch, tmp);
3517                 r = -EINVAL;
3518         }
3519         radeon_scratch_free(rdev, scratch);
3520         return r;
3521 }
3522
3523 /**
3524  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3525  *
3526  * @rdev: radeon_device pointer
3527  * @ridx: radeon ring index
3528  *
3529  * Emits an hdp flush on the cp.
3530  */
3531 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3532                                        int ridx)
3533 {
3534         struct radeon_ring *ring = &rdev->ring[ridx];
3535         u32 ref_and_mask;
3536
3537         switch (ring->idx) {
3538         case CAYMAN_RING_TYPE_CP1_INDEX:
3539         case CAYMAN_RING_TYPE_CP2_INDEX:
3540         default:
3541                 switch (ring->me) {
3542                 case 0:
3543                         ref_and_mask = CP2 << ring->pipe;
3544                         break;
3545                 case 1:
3546                         ref_and_mask = CP6 << ring->pipe;
3547                         break;
3548                 default:
3549                         return;
3550                 }
3551                 break;
3552         case RADEON_RING_TYPE_GFX_INDEX:
3553                 ref_and_mask = CP0;
3554                 break;
3555         }
3556
3557         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3558         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3559                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3560                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3561         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3562         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3563         radeon_ring_write(ring, ref_and_mask);
3564         radeon_ring_write(ring, ref_and_mask);
3565         radeon_ring_write(ring, 0x20); /* poll interval */
3566 }
3567
3568 /**
3569  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3570  *
3571  * @rdev: radeon_device pointer
3572  * @fence: radeon fence object
3573  *
3574  * Emits a fence sequnce number on the gfx ring and flushes
3575  * GPU caches.
3576  */
3577 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3578                              struct radeon_fence *fence)
3579 {
3580         struct radeon_ring *ring = &rdev->ring[fence->ring];
3581         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3582
3583         /* Workaround for cache flush problems. First send a dummy EOP
3584          * event down the pipe with seq one below.
3585          */
3586         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3587         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3588                                  EOP_TC_ACTION_EN |
3589                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3590                                  EVENT_INDEX(5)));
3591         radeon_ring_write(ring, addr & 0xfffffffc);
3592         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3593                                 DATA_SEL(1) | INT_SEL(0));
3594         radeon_ring_write(ring, fence->seq - 1);
3595         radeon_ring_write(ring, 0);
3596
3597         /* Then send the real EOP event down the pipe. */
3598         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3599         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3600                                  EOP_TC_ACTION_EN |
3601                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3602                                  EVENT_INDEX(5)));
3603         radeon_ring_write(ring, addr & 0xfffffffc);
3604         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3605         radeon_ring_write(ring, fence->seq);
3606         radeon_ring_write(ring, 0);
3607 }
3608
3609 /**
3610  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3611  *
3612  * @rdev: radeon_device pointer
3613  * @fence: radeon fence object
3614  *
3615  * Emits a fence sequnce number on the compute ring and flushes
3616  * GPU caches.
3617  */
3618 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3619                                  struct radeon_fence *fence)
3620 {
3621         struct radeon_ring *ring = &rdev->ring[fence->ring];
3622         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3623
3624         /* RELEASE_MEM - flush caches, send int */
3625         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3626         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3627                                  EOP_TC_ACTION_EN |
3628                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3629                                  EVENT_INDEX(5)));
3630         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3631         radeon_ring_write(ring, addr & 0xfffffffc);
3632         radeon_ring_write(ring, upper_32_bits(addr));
3633         radeon_ring_write(ring, fence->seq);
3634         radeon_ring_write(ring, 0);
3635 }
3636
3637 /**
3638  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3639  *
3640  * @rdev: radeon_device pointer
3641  * @ring: radeon ring buffer object
3642  * @semaphore: radeon semaphore object
3643  * @emit_wait: Is this a sempahore wait?
3644  *
3645  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3646  * from running ahead of semaphore waits.
3647  */
3648 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3649                              struct radeon_ring *ring,
3650                              struct radeon_semaphore *semaphore,
3651                              bool emit_wait)
3652 {
3653         uint64_t addr = semaphore->gpu_addr;
3654         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3655
3656         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3657         radeon_ring_write(ring, lower_32_bits(addr));
3658         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3659
3660         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3661                 /* Prevent the PFP from running ahead of the semaphore wait */
3662                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3663                 radeon_ring_write(ring, 0x0);
3664         }
3665
3666         return true;
3667 }
3668
3669 /**
3670  * cik_copy_cpdma - copy pages using the CP DMA engine
3671  *
3672  * @rdev: radeon_device pointer
3673  * @src_offset: src GPU address
3674  * @dst_offset: dst GPU address
3675  * @num_gpu_pages: number of GPU pages to xfer
3676  * @resv: reservation object to sync to
3677  *
3678  * Copy GPU paging using the CP DMA engine (CIK+).
3679  * Used by the radeon ttm implementation to move pages if
3680  * registered as the asic copy callback.
3681  */
3682 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3683                                     uint64_t src_offset, uint64_t dst_offset,
3684                                     unsigned num_gpu_pages,
3685                                     struct reservation_object *resv)
3686 {
3687         struct radeon_fence *fence;
3688         struct radeon_sync sync;
3689         int ring_index = rdev->asic->copy.blit_ring_index;
3690         struct radeon_ring *ring = &rdev->ring[ring_index];
3691         u32 size_in_bytes, cur_size_in_bytes, control;
3692         int i, num_loops;
3693         int r = 0;
3694
3695         radeon_sync_create(&sync);
3696
3697         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3698         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3699         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3700         if (r) {
3701                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3702                 radeon_sync_free(rdev, &sync, NULL);
3703                 return ERR_PTR(r);
3704         }
3705
3706         radeon_sync_resv(rdev, &sync, resv, false);
3707         radeon_sync_rings(rdev, &sync, ring->idx);
3708
3709         for (i = 0; i < num_loops; i++) {
3710                 cur_size_in_bytes = size_in_bytes;
3711                 if (cur_size_in_bytes > 0x1fffff)
3712                         cur_size_in_bytes = 0x1fffff;
3713                 size_in_bytes -= cur_size_in_bytes;
3714                 control = 0;
3715                 if (size_in_bytes == 0)
3716                         control |= PACKET3_DMA_DATA_CP_SYNC;
3717                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3718                 radeon_ring_write(ring, control);
3719                 radeon_ring_write(ring, lower_32_bits(src_offset));
3720                 radeon_ring_write(ring, upper_32_bits(src_offset));
3721                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3722                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3723                 radeon_ring_write(ring, cur_size_in_bytes);
3724                 src_offset += cur_size_in_bytes;
3725                 dst_offset += cur_size_in_bytes;
3726         }
3727
3728         r = radeon_fence_emit(rdev, &fence, ring->idx);
3729         if (r) {
3730                 radeon_ring_unlock_undo(rdev, ring);
3731                 radeon_sync_free(rdev, &sync, NULL);
3732                 return ERR_PTR(r);
3733         }
3734
3735         radeon_ring_unlock_commit(rdev, ring, false);
3736         radeon_sync_free(rdev, &sync, fence);
3737
3738         return fence;
3739 }
3740
3741 /*
3742  * IB stuff
3743  */
3744 /**
3745  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3746  *
3747  * @rdev: radeon_device pointer
3748  * @ib: radeon indirect buffer object
3749  *
3750  * Emits a DE (drawing engine) or CE (constant engine) IB
3751  * on the gfx ring.  IBs are usually generated by userspace
3752  * acceleration drivers and submitted to the kernel for
3753  * scheduling on the ring.  This function schedules the IB
3754  * on the gfx ring for execution by the GPU.
3755  */
3756 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3757 {
3758         struct radeon_ring *ring = &rdev->ring[ib->ring];
3759         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3760         u32 header, control = INDIRECT_BUFFER_VALID;
3761
3762         if (ib->is_const_ib) {
3763                 /* set switch buffer packet before const IB */
3764                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3765                 radeon_ring_write(ring, 0);
3766
3767                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3768         } else {
3769                 u32 next_rptr;
3770                 if (ring->rptr_save_reg) {
3771                         next_rptr = ring->wptr + 3 + 4;
3772                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3773                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3774                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3775                         radeon_ring_write(ring, next_rptr);
3776                 } else if (rdev->wb.enabled) {
3777                         next_rptr = ring->wptr + 5 + 4;
3778                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3779                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3780                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3781                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3782                         radeon_ring_write(ring, next_rptr);
3783                 }
3784
3785                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3786         }
3787
3788         control |= ib->length_dw | (vm_id << 24);
3789
3790         radeon_ring_write(ring, header);
3791         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3792         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3793         radeon_ring_write(ring, control);
3794 }
3795
3796 /**
3797  * cik_ib_test - basic gfx ring IB test
3798  *
3799  * @rdev: radeon_device pointer
3800  * @ring: radeon_ring structure holding ring information
3801  *
3802  * Allocate an IB and execute it on the gfx ring (CIK).
3803  * Provides a basic gfx ring test to verify that IBs are working.
3804  * Returns 0 on success, error on failure.
3805  */
3806 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3807 {
3808         struct radeon_ib ib;
3809         uint32_t scratch;
3810         uint32_t tmp = 0;
3811         unsigned i;
3812         int r;
3813
3814         r = radeon_scratch_get(rdev, &scratch);
3815         if (r) {
3816                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3817                 return r;
3818         }
3819         WREG32(scratch, 0xCAFEDEAD);
3820         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3821         if (r) {
3822                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3823                 radeon_scratch_free(rdev, scratch);
3824                 return r;
3825         }
3826         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3827         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3828         ib.ptr[2] = 0xDEADBEEF;
3829         ib.length_dw = 3;
3830         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3831         if (r) {
3832                 radeon_scratch_free(rdev, scratch);
3833                 radeon_ib_free(rdev, &ib);
3834                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3835                 return r;
3836         }
3837         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3838                 RADEON_USEC_IB_TEST_TIMEOUT));
3839         if (r < 0) {
3840                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3841                 radeon_scratch_free(rdev, scratch);
3842                 radeon_ib_free(rdev, &ib);
3843                 return r;
3844         } else if (r == 0) {
3845                 DRM_ERROR("radeon: fence wait timed out.\n");
3846                 radeon_scratch_free(rdev, scratch);
3847                 radeon_ib_free(rdev, &ib);
3848                 return -ETIMEDOUT;
3849         }
3850         r = 0;
3851         for (i = 0; i < rdev->usec_timeout; i++) {
3852                 tmp = RREG32(scratch);
3853                 if (tmp == 0xDEADBEEF)
3854                         break;
3855                 DRM_UDELAY(1);
3856         }
3857         if (i < rdev->usec_timeout) {
3858                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3859         } else {
3860                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3861                           scratch, tmp);
3862                 r = -EINVAL;
3863         }
3864         radeon_scratch_free(rdev, scratch);
3865         radeon_ib_free(rdev, &ib);
3866         return r;
3867 }
3868
3869 /*
3870  * CP.
3871  * On CIK, gfx and compute now have independant command processors.
3872  *
3873  * GFX
3874  * Gfx consists of a single ring and can process both gfx jobs and
3875  * compute jobs.  The gfx CP consists of three microengines (ME):
3876  * PFP - Pre-Fetch Parser
3877  * ME - Micro Engine
3878  * CE - Constant Engine
3879  * The PFP and ME make up what is considered the Drawing Engine (DE).
3880  * The CE is an asynchronous engine used for updating buffer desciptors
3881  * used by the DE so that they can be loaded into cache in parallel
3882  * while the DE is processing state update packets.
3883  *
3884  * Compute
3885  * The compute CP consists of two microengines (ME):
3886  * MEC1 - Compute MicroEngine 1
3887  * MEC2 - Compute MicroEngine 2
3888  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3889  * The queues are exposed to userspace and are programmed directly
3890  * by the compute runtime.
3891  */
3892 /**
3893  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3894  *
3895  * @rdev: radeon_device pointer
3896  * @enable: enable or disable the MEs
3897  *
3898  * Halts or unhalts the gfx MEs.
3899  */
3900 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3901 {
3902         if (enable)
3903                 WREG32(CP_ME_CNTL, 0);
3904         else {
3905                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3906                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3907                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3908                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3909         }
3910         udelay(50);
3911 }
3912
3913 /**
3914  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3915  *
3916  * @rdev: radeon_device pointer
3917  *
3918  * Loads the gfx PFP, ME, and CE ucode.
3919  * Returns 0 for success, -EINVAL if the ucode is not available.
3920  */
3921 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3922 {
3923         int i;
3924
3925         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3926                 return -EINVAL;
3927
3928         cik_cp_gfx_enable(rdev, false);
3929
3930         if (rdev->new_fw) {
3931                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3932                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3933                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3934                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3935                 const struct gfx_firmware_header_v1_0 *me_hdr =
3936                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3937                 const __le32 *fw_data;
3938                 u32 fw_size;
3939
3940                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3941                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3942                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3943
3944                 /* PFP */
3945                 fw_data = (const __le32 *)
3946                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3947                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3948                 WREG32(CP_PFP_UCODE_ADDR, 0);
3949                 for (i = 0; i < fw_size; i++)
3950                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3951                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3952
3953                 /* CE */
3954                 fw_data = (const __le32 *)
3955                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3956                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3957                 WREG32(CP_CE_UCODE_ADDR, 0);
3958                 for (i = 0; i < fw_size; i++)
3959                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3960                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3961
3962                 /* ME */
3963                 fw_data = (const __be32 *)
3964                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3965                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3966                 WREG32(CP_ME_RAM_WADDR, 0);
3967                 for (i = 0; i < fw_size; i++)
3968                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3969                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3970                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3971         } else {
3972                 const __be32 *fw_data;
3973
3974                 /* PFP */
3975                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3976                 WREG32(CP_PFP_UCODE_ADDR, 0);
3977                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3978                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3979                 WREG32(CP_PFP_UCODE_ADDR, 0);
3980
3981                 /* CE */
3982                 fw_data = (const __be32 *)rdev->ce_fw->data;
3983                 WREG32(CP_CE_UCODE_ADDR, 0);
3984                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3985                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3986                 WREG32(CP_CE_UCODE_ADDR, 0);
3987
3988                 /* ME */
3989                 fw_data = (const __be32 *)rdev->me_fw->data;
3990                 WREG32(CP_ME_RAM_WADDR, 0);
3991                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3992                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3993                 WREG32(CP_ME_RAM_WADDR, 0);
3994         }
3995
3996         return 0;
3997 }
3998
3999 /**
4000  * cik_cp_gfx_start - start the gfx ring
4001  *
4002  * @rdev: radeon_device pointer
4003  *
4004  * Enables the ring and loads the clear state context and other
4005  * packets required to init the ring.
4006  * Returns 0 for success, error for failure.
4007  */
4008 static int cik_cp_gfx_start(struct radeon_device *rdev)
4009 {
4010         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4011         int r, i;
4012
4013         /* init the CP */
4014         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4015         WREG32(CP_ENDIAN_SWAP, 0);
4016         WREG32(CP_DEVICE_ID, 1);
4017
4018         cik_cp_gfx_enable(rdev, true);
4019
4020         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4021         if (r) {
4022                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4023                 return r;
4024         }
4025
4026         /* init the CE partitions.  CE only used for gfx on CIK */
4027         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4028         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4029         radeon_ring_write(ring, 0x8000);
4030         radeon_ring_write(ring, 0x8000);
4031
4032         /* setup clear context state */
4033         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4034         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4035
4036         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4037         radeon_ring_write(ring, 0x80000000);
4038         radeon_ring_write(ring, 0x80000000);
4039
4040         for (i = 0; i < cik_default_size; i++)
4041                 radeon_ring_write(ring, cik_default_state[i]);
4042
4043         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4044         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4045
4046         /* set clear context state */
4047         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4048         radeon_ring_write(ring, 0);
4049
4050         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4051         radeon_ring_write(ring, 0x00000316);
4052         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4053         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4054
4055         radeon_ring_unlock_commit(rdev, ring, false);
4056
4057         return 0;
4058 }
4059
4060 /**
4061  * cik_cp_gfx_fini - stop the gfx ring
4062  *
4063  * @rdev: radeon_device pointer
4064  *
4065  * Stop the gfx ring and tear down the driver ring
4066  * info.
4067  */
4068 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4069 {
4070         cik_cp_gfx_enable(rdev, false);
4071         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4072 }
4073
4074 /**
4075  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4076  *
4077  * @rdev: radeon_device pointer
4078  *
4079  * Program the location and size of the gfx ring buffer
4080  * and test it to make sure it's working.
4081  * Returns 0 for success, error for failure.
4082  */
4083 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4084 {
4085         struct radeon_ring *ring;
4086         u32 tmp;
4087         u32 rb_bufsz;
4088         u64 rb_addr;
4089         int r;
4090
4091         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4092         if (rdev->family != CHIP_HAWAII)
4093                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4094
4095         /* Set the write pointer delay */
4096         WREG32(CP_RB_WPTR_DELAY, 0);
4097
4098         /* set the RB to use vmid 0 */
4099         WREG32(CP_RB_VMID, 0);
4100
4101         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4102
4103         /* ring 0 - compute and gfx */
4104         /* Set ring buffer size */
4105         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4106         rb_bufsz = order_base_2(ring->ring_size / 8);
4107         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4108 #ifdef __BIG_ENDIAN
4109         tmp |= BUF_SWAP_32BIT;
4110 #endif
4111         WREG32(CP_RB0_CNTL, tmp);
4112
4113         /* Initialize the ring buffer's read and write pointers */
4114         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4115         ring->wptr = 0;
4116         WREG32(CP_RB0_WPTR, ring->wptr);
4117
4118         /* set the wb address wether it's enabled or not */
4119         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4120         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4121
4122         /* scratch register shadowing is no longer supported */
4123         WREG32(SCRATCH_UMSK, 0);
4124
4125         if (!rdev->wb.enabled)
4126                 tmp |= RB_NO_UPDATE;
4127
4128         mdelay(1);
4129         WREG32(CP_RB0_CNTL, tmp);
4130
4131         rb_addr = ring->gpu_addr >> 8;
4132         WREG32(CP_RB0_BASE, rb_addr);
4133         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4134
4135         /* start the ring */
4136         cik_cp_gfx_start(rdev);
4137         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4138         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4139         if (r) {
4140                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4141                 return r;
4142         }
4143
4144         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4145                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4146
4147         return 0;
4148 }
4149
4150 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4151                      struct radeon_ring *ring)
4152 {
4153         u32 rptr;
4154
4155         if (rdev->wb.enabled)
4156                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4157         else
4158                 rptr = RREG32(CP_RB0_RPTR);
4159
4160         return rptr;
4161 }
4162
4163 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4164                      struct radeon_ring *ring)
4165 {
4166         return RREG32(CP_RB0_WPTR);
4167 }
4168
4169 void cik_gfx_set_wptr(struct radeon_device *rdev,
4170                       struct radeon_ring *ring)
4171 {
4172         WREG32(CP_RB0_WPTR, ring->wptr);
4173         (void)RREG32(CP_RB0_WPTR);
4174 }
4175
4176 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4177                          struct radeon_ring *ring)
4178 {
4179         u32 rptr;
4180
4181         if (rdev->wb.enabled) {
4182                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4183         } else {
4184                 mutex_lock(&rdev->srbm_mutex);
4185                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4186                 rptr = RREG32(CP_HQD_PQ_RPTR);
4187                 cik_srbm_select(rdev, 0, 0, 0, 0);
4188                 mutex_unlock(&rdev->srbm_mutex);
4189         }
4190
4191         return rptr;
4192 }
4193
4194 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4195                          struct radeon_ring *ring)
4196 {
4197         u32 wptr;
4198
4199         if (rdev->wb.enabled) {
4200                 /* XXX check if swapping is necessary on BE */
4201                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4202         } else {
4203                 mutex_lock(&rdev->srbm_mutex);
4204                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4205                 wptr = RREG32(CP_HQD_PQ_WPTR);
4206                 cik_srbm_select(rdev, 0, 0, 0, 0);
4207                 mutex_unlock(&rdev->srbm_mutex);
4208         }
4209
4210         return wptr;
4211 }
4212
4213 void cik_compute_set_wptr(struct radeon_device *rdev,
4214                           struct radeon_ring *ring)
4215 {
4216         /* XXX check if swapping is necessary on BE */
4217         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4218         WDOORBELL32(ring->doorbell_index, ring->wptr);
4219 }
4220
4221 static void cik_compute_stop(struct radeon_device *rdev,
4222                              struct radeon_ring *ring)
4223 {
4224         u32 j, tmp;
4225
4226         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4227         /* Disable wptr polling. */
4228         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4229         tmp &= ~WPTR_POLL_EN;
4230         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4231         /* Disable HQD. */
4232         if (RREG32(CP_HQD_ACTIVE) & 1) {
4233                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4234                 for (j = 0; j < rdev->usec_timeout; j++) {
4235                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4236                                 break;
4237                         udelay(1);
4238                 }
4239                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4240                 WREG32(CP_HQD_PQ_RPTR, 0);
4241                 WREG32(CP_HQD_PQ_WPTR, 0);
4242         }
4243         cik_srbm_select(rdev, 0, 0, 0, 0);
4244 }
4245
4246 /**
4247  * cik_cp_compute_enable - enable/disable the compute CP MEs
4248  *
4249  * @rdev: radeon_device pointer
4250  * @enable: enable or disable the MEs
4251  *
4252  * Halts or unhalts the compute MEs.
4253  */
4254 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4255 {
4256         if (enable)
4257                 WREG32(CP_MEC_CNTL, 0);
4258         else {
4259                 /*
4260                  * To make hibernation reliable we need to clear compute ring
4261                  * configuration before halting the compute ring.
4262                  */
4263                 mutex_lock(&rdev->srbm_mutex);
4264                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4265                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4266                 mutex_unlock(&rdev->srbm_mutex);
4267
4268                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4269                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4270                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4271         }
4272         udelay(50);
4273 }
4274
4275 /**
4276  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4277  *
4278  * @rdev: radeon_device pointer
4279  *
4280  * Loads the compute MEC1&2 ucode.
4281  * Returns 0 for success, -EINVAL if the ucode is not available.
4282  */
4283 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4284 {
4285         int i;
4286
4287         if (!rdev->mec_fw)
4288                 return -EINVAL;
4289
4290         cik_cp_compute_enable(rdev, false);
4291
4292         if (rdev->new_fw) {
4293                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4294                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4295                 const __le32 *fw_data;
4296                 u32 fw_size;
4297
4298                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4299
4300                 /* MEC1 */
4301                 fw_data = (const __le32 *)
4302                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4303                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4304                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4305                 for (i = 0; i < fw_size; i++)
4306                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4307                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4308
4309                 /* MEC2 */
4310                 if (rdev->family == CHIP_KAVERI) {
4311                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4312                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4313
4314                         fw_data = (const __le32 *)
4315                                 (rdev->mec2_fw->data +
4316                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4317                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4318                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4319                         for (i = 0; i < fw_size; i++)
4320                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4321                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4322                 }
4323         } else {
4324                 const __be32 *fw_data;
4325
4326                 /* MEC1 */
4327                 fw_data = (const __be32 *)rdev->mec_fw->data;
4328                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4329                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4330                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4331                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4332
4333                 if (rdev->family == CHIP_KAVERI) {
4334                         /* MEC2 */
4335                         fw_data = (const __be32 *)rdev->mec_fw->data;
4336                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4337                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4338                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4339                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4340                 }
4341         }
4342
4343         return 0;
4344 }
4345
4346 /**
4347  * cik_cp_compute_start - start the compute queues
4348  *
4349  * @rdev: radeon_device pointer
4350  *
4351  * Enable the compute queues.
4352  * Returns 0 for success, error for failure.
4353  */
4354 static int cik_cp_compute_start(struct radeon_device *rdev)
4355 {
4356         cik_cp_compute_enable(rdev, true);
4357
4358         return 0;
4359 }
4360
4361 /**
4362  * cik_cp_compute_fini - stop the compute queues
4363  *
4364  * @rdev: radeon_device pointer
4365  *
4366  * Stop the compute queues and tear down the driver queue
4367  * info.
4368  */
4369 static void cik_cp_compute_fini(struct radeon_device *rdev)
4370 {
4371         int i, idx, r;
4372
4373         cik_cp_compute_enable(rdev, false);
4374
4375         for (i = 0; i < 2; i++) {
4376                 if (i == 0)
4377                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4378                 else
4379                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4380
4381                 if (rdev->ring[idx].mqd_obj) {
4382                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4383                         if (unlikely(r != 0))
4384                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4385
4386                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4387                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4388
4389                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4390                         rdev->ring[idx].mqd_obj = NULL;
4391                 }
4392         }
4393 }
4394
4395 static void cik_mec_fini(struct radeon_device *rdev)
4396 {
4397         int r;
4398
4399         if (rdev->mec.hpd_eop_obj) {
4400                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4401                 if (unlikely(r != 0))
4402                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4403                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4404                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4405
4406                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4407                 rdev->mec.hpd_eop_obj = NULL;
4408         }
4409 }
4410
4411 #define MEC_HPD_SIZE 2048
4412
4413 static int cik_mec_init(struct radeon_device *rdev)
4414 {
4415         int r;
4416         u32 *hpd;
4417
4418         /*
4419          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4420          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4421          */
4422         if (rdev->family == CHIP_KAVERI)
4423                 rdev->mec.num_mec = 2;
4424         else
4425                 rdev->mec.num_mec = 1;
4426         rdev->mec.num_pipe = 4;
4427         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4428
4429         if (rdev->mec.hpd_eop_obj == NULL) {
4430                 r = radeon_bo_create(rdev,
4431                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4432                                      PAGE_SIZE, true,
4433                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4434                                      &rdev->mec.hpd_eop_obj);
4435                 if (r) {
4436                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4437                         return r;
4438                 }
4439         }
4440
4441         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4442         if (unlikely(r != 0)) {
4443                 cik_mec_fini(rdev);
4444                 return r;
4445         }
4446         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4447                           &rdev->mec.hpd_eop_gpu_addr);
4448         if (r) {
4449                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4450                 cik_mec_fini(rdev);
4451                 return r;
4452         }
4453         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4454         if (r) {
4455                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4456                 cik_mec_fini(rdev);
4457                 return r;
4458         }
4459
4460         /* clear memory.  Not sure if this is required or not */
4461         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4462
4463         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4464         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4465
4466         return 0;
4467 }
4468
4469 struct hqd_registers
4470 {
4471         u32 cp_mqd_base_addr;
4472         u32 cp_mqd_base_addr_hi;
4473         u32 cp_hqd_active;
4474         u32 cp_hqd_vmid;
4475         u32 cp_hqd_persistent_state;
4476         u32 cp_hqd_pipe_priority;
4477         u32 cp_hqd_queue_priority;
4478         u32 cp_hqd_quantum;
4479         u32 cp_hqd_pq_base;
4480         u32 cp_hqd_pq_base_hi;
4481         u32 cp_hqd_pq_rptr;
4482         u32 cp_hqd_pq_rptr_report_addr;
4483         u32 cp_hqd_pq_rptr_report_addr_hi;
4484         u32 cp_hqd_pq_wptr_poll_addr;
4485         u32 cp_hqd_pq_wptr_poll_addr_hi;
4486         u32 cp_hqd_pq_doorbell_control;
4487         u32 cp_hqd_pq_wptr;
4488         u32 cp_hqd_pq_control;
4489         u32 cp_hqd_ib_base_addr;
4490         u32 cp_hqd_ib_base_addr_hi;
4491         u32 cp_hqd_ib_rptr;
4492         u32 cp_hqd_ib_control;
4493         u32 cp_hqd_iq_timer;
4494         u32 cp_hqd_iq_rptr;
4495         u32 cp_hqd_dequeue_request;
4496         u32 cp_hqd_dma_offload;
4497         u32 cp_hqd_sema_cmd;
4498         u32 cp_hqd_msg_type;
4499         u32 cp_hqd_atomic0_preop_lo;
4500         u32 cp_hqd_atomic0_preop_hi;
4501         u32 cp_hqd_atomic1_preop_lo;
4502         u32 cp_hqd_atomic1_preop_hi;
4503         u32 cp_hqd_hq_scheduler0;
4504         u32 cp_hqd_hq_scheduler1;
4505         u32 cp_mqd_control;
4506 };
4507
4508 struct bonaire_mqd
4509 {
4510         u32 header;
4511         u32 dispatch_initiator;
4512         u32 dimensions[3];
4513         u32 start_idx[3];
4514         u32 num_threads[3];
4515         u32 pipeline_stat_enable;
4516         u32 perf_counter_enable;
4517         u32 pgm[2];
4518         u32 tba[2];
4519         u32 tma[2];
4520         u32 pgm_rsrc[2];
4521         u32 vmid;
4522         u32 resource_limits;
4523         u32 static_thread_mgmt01[2];
4524         u32 tmp_ring_size;
4525         u32 static_thread_mgmt23[2];
4526         u32 restart[3];
4527         u32 thread_trace_enable;
4528         u32 reserved1;
4529         u32 user_data[16];
4530         u32 vgtcs_invoke_count[2];
4531         struct hqd_registers queue_state;
4532         u32 dequeue_cntr;
4533         u32 interrupt_queue[64];
4534 };
4535
4536 /**
4537  * cik_cp_compute_resume - setup the compute queue registers
4538  *
4539  * @rdev: radeon_device pointer
4540  *
4541  * Program the compute queues and test them to make sure they
4542  * are working.
4543  * Returns 0 for success, error for failure.
4544  */
4545 static int cik_cp_compute_resume(struct radeon_device *rdev)
4546 {
4547         int r, i, j, idx;
4548         u32 tmp;
4549         bool use_doorbell = true;
4550         u64 hqd_gpu_addr;
4551         u64 mqd_gpu_addr;
4552         u64 eop_gpu_addr;
4553         u64 wb_gpu_addr;
4554         u32 *buf;
4555         struct bonaire_mqd *mqd;
4556
4557         r = cik_cp_compute_start(rdev);
4558         if (r)
4559                 return r;
4560
4561         /* fix up chicken bits */
4562         tmp = RREG32(CP_CPF_DEBUG);
4563         tmp |= (1 << 23);
4564         WREG32(CP_CPF_DEBUG, tmp);
4565
4566         /* init the pipes */
4567         mutex_lock(&rdev->srbm_mutex);
4568
4569         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4570                 int me = (i < 4) ? 1 : 2;
4571                 int pipe = (i < 4) ? i : (i - 4);
4572
4573                 cik_srbm_select(rdev, me, pipe, 0, 0);
4574
4575                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4576                 /* write the EOP addr */
4577                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4578                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4579
4580                 /* set the VMID assigned */
4581                 WREG32(CP_HPD_EOP_VMID, 0);
4582
4583                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4584                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4585                 tmp &= ~EOP_SIZE_MASK;
4586                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4587                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4588
4589         }
4590         cik_srbm_select(rdev, 0, 0, 0, 0);
4591         mutex_unlock(&rdev->srbm_mutex);
4592
4593         /* init the queues.  Just two for now. */
4594         for (i = 0; i < 2; i++) {
4595                 if (i == 0)
4596                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4597                 else
4598                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4599
4600                 if (rdev->ring[idx].mqd_obj == NULL) {
4601                         r = radeon_bo_create(rdev,
4602                                              sizeof(struct bonaire_mqd),
4603                                              PAGE_SIZE, true,
4604                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4605                                              NULL, &rdev->ring[idx].mqd_obj);
4606                         if (r) {
4607                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4608                                 return r;
4609                         }
4610                 }
4611
4612                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4613                 if (unlikely(r != 0)) {
4614                         cik_cp_compute_fini(rdev);
4615                         return r;
4616                 }
4617                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4618                                   &mqd_gpu_addr);
4619                 if (r) {
4620                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4621                         cik_cp_compute_fini(rdev);
4622                         return r;
4623                 }
4624                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4625                 if (r) {
4626                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4627                         cik_cp_compute_fini(rdev);
4628                         return r;
4629                 }
4630
4631                 /* init the mqd struct */
4632                 memset(buf, 0, sizeof(struct bonaire_mqd));
4633
4634                 mqd = (struct bonaire_mqd *)buf;
4635                 mqd->header = 0xC0310800;
4636                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4637                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4638                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4639                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4640
4641                 mutex_lock(&rdev->srbm_mutex);
4642                 cik_srbm_select(rdev, rdev->ring[idx].me,
4643                                 rdev->ring[idx].pipe,
4644                                 rdev->ring[idx].queue, 0);
4645
4646                 /* disable wptr polling */
4647                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4648                 tmp &= ~WPTR_POLL_EN;
4649                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4650
4651                 /* enable doorbell? */
4652                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4653                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4654                 if (use_doorbell)
4655                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4656                 else
4657                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4658                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4659                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4660
4661                 /* disable the queue if it's active */
4662                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4663                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4664                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4665                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4666                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4667                         for (j = 0; j < rdev->usec_timeout; j++) {
4668                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4669                                         break;
4670                                 udelay(1);
4671                         }
4672                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4673                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4674                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4675                 }
4676
4677                 /* set the pointer to the MQD */
4678                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4679                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4680                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4681                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4682                 /* set MQD vmid to 0 */
4683                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4684                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4685                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4686
4687                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4688                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4689                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4690                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4691                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4692                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4693
4694                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4695                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4696                 mqd->queue_state.cp_hqd_pq_control &=
4697                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4698
4699                 mqd->queue_state.cp_hqd_pq_control |=
4700                         order_base_2(rdev->ring[idx].ring_size / 8);
4701                 mqd->queue_state.cp_hqd_pq_control |=
4702                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4703 #ifdef __BIG_ENDIAN
4704                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4705 #endif
4706                 mqd->queue_state.cp_hqd_pq_control &=
4707                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4708                 mqd->queue_state.cp_hqd_pq_control |=
4709                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4710                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4711
4712                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4713                 if (i == 0)
4714                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4715                 else
4716                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4717                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4718                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4719                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4720                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4721                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4722
4723                 /* set the wb address wether it's enabled or not */
4724                 if (i == 0)
4725                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4726                 else
4727                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4728                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4729                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4730                         upper_32_bits(wb_gpu_addr) & 0xffff;
4731                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4732                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4733                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4734                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4735
4736                 /* enable the doorbell if requested */
4737                 if (use_doorbell) {
4738                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4739                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4740                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4741                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4742                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4743                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4744                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4745                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4746
4747                 } else {
4748                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4749                 }
4750                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4751                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4752
4753                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4754                 rdev->ring[idx].wptr = 0;
4755                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4756                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4757                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4758
4759                 /* set the vmid for the queue */
4760                 mqd->queue_state.cp_hqd_vmid = 0;
4761                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4762
4763                 /* activate the queue */
4764                 mqd->queue_state.cp_hqd_active = 1;
4765                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4766
4767                 cik_srbm_select(rdev, 0, 0, 0, 0);
4768                 mutex_unlock(&rdev->srbm_mutex);
4769
4770                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4771                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4772
4773                 rdev->ring[idx].ready = true;
4774                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4775                 if (r)
4776                         rdev->ring[idx].ready = false;
4777         }
4778
4779         return 0;
4780 }
4781
4782 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4783 {
4784         cik_cp_gfx_enable(rdev, enable);
4785         cik_cp_compute_enable(rdev, enable);
4786 }
4787
4788 static int cik_cp_load_microcode(struct radeon_device *rdev)
4789 {
4790         int r;
4791
4792         r = cik_cp_gfx_load_microcode(rdev);
4793         if (r)
4794                 return r;
4795         r = cik_cp_compute_load_microcode(rdev);
4796         if (r)
4797                 return r;
4798
4799         return 0;
4800 }
4801
4802 static void cik_cp_fini(struct radeon_device *rdev)
4803 {
4804         cik_cp_gfx_fini(rdev);
4805         cik_cp_compute_fini(rdev);
4806 }
4807
4808 static int cik_cp_resume(struct radeon_device *rdev)
4809 {
4810         int r;
4811
4812         cik_enable_gui_idle_interrupt(rdev, false);
4813
4814         r = cik_cp_load_microcode(rdev);
4815         if (r)
4816                 return r;
4817
4818         r = cik_cp_gfx_resume(rdev);
4819         if (r)
4820                 return r;
4821         r = cik_cp_compute_resume(rdev);
4822         if (r)
4823                 return r;
4824
4825         cik_enable_gui_idle_interrupt(rdev, true);
4826
4827         return 0;
4828 }
4829
4830 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4831 {
4832         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4833                 RREG32(GRBM_STATUS));
4834         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4835                 RREG32(GRBM_STATUS2));
4836         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4837                 RREG32(GRBM_STATUS_SE0));
4838         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4839                 RREG32(GRBM_STATUS_SE1));
4840         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4841                 RREG32(GRBM_STATUS_SE2));
4842         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4843                 RREG32(GRBM_STATUS_SE3));
4844         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4845                 RREG32(SRBM_STATUS));
4846         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4847                 RREG32(SRBM_STATUS2));
4848         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4849                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4850         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4851                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4852         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4853         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4854                  RREG32(CP_STALLED_STAT1));
4855         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4856                  RREG32(CP_STALLED_STAT2));
4857         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4858                  RREG32(CP_STALLED_STAT3));
4859         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4860                  RREG32(CP_CPF_BUSY_STAT));
4861         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4862                  RREG32(CP_CPF_STALLED_STAT1));
4863         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4864         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4865         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4866                  RREG32(CP_CPC_STALLED_STAT1));
4867         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4868 }
4869
4870 /**
4871  * cik_gpu_check_soft_reset - check which blocks are busy
4872  *
4873  * @rdev: radeon_device pointer
4874  *
4875  * Check which blocks are busy and return the relevant reset
4876  * mask to be used by cik_gpu_soft_reset().
4877  * Returns a mask of the blocks to be reset.
4878  */
4879 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4880 {
4881         u32 reset_mask = 0;
4882         u32 tmp;
4883
4884         /* GRBM_STATUS */
4885         tmp = RREG32(GRBM_STATUS);
4886         if (tmp & (PA_BUSY | SC_BUSY |
4887                    BCI_BUSY | SX_BUSY |
4888                    TA_BUSY | VGT_BUSY |
4889                    DB_BUSY | CB_BUSY |
4890                    GDS_BUSY | SPI_BUSY |
4891                    IA_BUSY | IA_BUSY_NO_DMA))
4892                 reset_mask |= RADEON_RESET_GFX;
4893
4894         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4895                 reset_mask |= RADEON_RESET_CP;
4896
4897         /* GRBM_STATUS2 */
4898         tmp = RREG32(GRBM_STATUS2);
4899         if (tmp & RLC_BUSY)
4900                 reset_mask |= RADEON_RESET_RLC;
4901
4902         /* SDMA0_STATUS_REG */
4903         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4904         if (!(tmp & SDMA_IDLE))
4905                 reset_mask |= RADEON_RESET_DMA;
4906
4907         /* SDMA1_STATUS_REG */
4908         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4909         if (!(tmp & SDMA_IDLE))
4910                 reset_mask |= RADEON_RESET_DMA1;
4911
4912         /* SRBM_STATUS2 */
4913         tmp = RREG32(SRBM_STATUS2);
4914         if (tmp & SDMA_BUSY)
4915                 reset_mask |= RADEON_RESET_DMA;
4916
4917         if (tmp & SDMA1_BUSY)
4918                 reset_mask |= RADEON_RESET_DMA1;
4919
4920         /* SRBM_STATUS */
4921         tmp = RREG32(SRBM_STATUS);
4922
4923         if (tmp & IH_BUSY)
4924                 reset_mask |= RADEON_RESET_IH;
4925
4926         if (tmp & SEM_BUSY)
4927                 reset_mask |= RADEON_RESET_SEM;
4928
4929         if (tmp & GRBM_RQ_PENDING)
4930                 reset_mask |= RADEON_RESET_GRBM;
4931
4932         if (tmp & VMC_BUSY)
4933                 reset_mask |= RADEON_RESET_VMC;
4934
4935         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4936                    MCC_BUSY | MCD_BUSY))
4937                 reset_mask |= RADEON_RESET_MC;
4938
4939         if (evergreen_is_display_hung(rdev))
4940                 reset_mask |= RADEON_RESET_DISPLAY;
4941
4942         /* Skip MC reset as it's mostly likely not hung, just busy */
4943         if (reset_mask & RADEON_RESET_MC) {
4944                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4945                 reset_mask &= ~RADEON_RESET_MC;
4946         }
4947
4948         return reset_mask;
4949 }
4950
4951 /**
4952  * cik_gpu_soft_reset - soft reset GPU
4953  *
4954  * @rdev: radeon_device pointer
4955  * @reset_mask: mask of which blocks to reset
4956  *
4957  * Soft reset the blocks specified in @reset_mask.
4958  */
4959 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4960 {
4961         struct evergreen_mc_save save;
4962         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4963         u32 tmp;
4964
4965         if (reset_mask == 0)
4966                 return;
4967
4968         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4969
4970         cik_print_gpu_status_regs(rdev);
4971         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4972                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4973         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4974                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4975
4976         /* disable CG/PG */
4977         cik_fini_pg(rdev);
4978         cik_fini_cg(rdev);
4979
4980         /* stop the rlc */
4981         cik_rlc_stop(rdev);
4982
4983         /* Disable GFX parsing/prefetching */
4984         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4985
4986         /* Disable MEC parsing/prefetching */
4987         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4988
4989         if (reset_mask & RADEON_RESET_DMA) {
4990                 /* sdma0 */
4991                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4992                 tmp |= SDMA_HALT;
4993                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4994         }
4995         if (reset_mask & RADEON_RESET_DMA1) {
4996                 /* sdma1 */
4997                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4998                 tmp |= SDMA_HALT;
4999                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5000         }
5001
5002         evergreen_mc_stop(rdev, &save);
5003         if (evergreen_mc_wait_for_idle(rdev)) {
5004                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5005         }
5006
5007         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5008                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5009
5010         if (reset_mask & RADEON_RESET_CP) {
5011                 grbm_soft_reset |= SOFT_RESET_CP;
5012
5013                 srbm_soft_reset |= SOFT_RESET_GRBM;
5014         }
5015
5016         if (reset_mask & RADEON_RESET_DMA)
5017                 srbm_soft_reset |= SOFT_RESET_SDMA;
5018
5019         if (reset_mask & RADEON_RESET_DMA1)
5020                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5021
5022         if (reset_mask & RADEON_RESET_DISPLAY)
5023                 srbm_soft_reset |= SOFT_RESET_DC;
5024
5025         if (reset_mask & RADEON_RESET_RLC)
5026                 grbm_soft_reset |= SOFT_RESET_RLC;
5027
5028         if (reset_mask & RADEON_RESET_SEM)
5029                 srbm_soft_reset |= SOFT_RESET_SEM;
5030
5031         if (reset_mask & RADEON_RESET_IH)
5032                 srbm_soft_reset |= SOFT_RESET_IH;
5033
5034         if (reset_mask & RADEON_RESET_GRBM)
5035                 srbm_soft_reset |= SOFT_RESET_GRBM;
5036
5037         if (reset_mask & RADEON_RESET_VMC)
5038                 srbm_soft_reset |= SOFT_RESET_VMC;
5039
5040         if (!(rdev->flags & RADEON_IS_IGP)) {
5041                 if (reset_mask & RADEON_RESET_MC)
5042                         srbm_soft_reset |= SOFT_RESET_MC;
5043         }
5044
5045         if (grbm_soft_reset) {
5046                 tmp = RREG32(GRBM_SOFT_RESET);
5047                 tmp |= grbm_soft_reset;
5048                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5049                 WREG32(GRBM_SOFT_RESET, tmp);
5050                 tmp = RREG32(GRBM_SOFT_RESET);
5051
5052                 udelay(50);
5053
5054                 tmp &= ~grbm_soft_reset;
5055                 WREG32(GRBM_SOFT_RESET, tmp);
5056                 tmp = RREG32(GRBM_SOFT_RESET);
5057         }
5058
5059         if (srbm_soft_reset) {
5060                 tmp = RREG32(SRBM_SOFT_RESET);
5061                 tmp |= srbm_soft_reset;
5062                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5063                 WREG32(SRBM_SOFT_RESET, tmp);
5064                 tmp = RREG32(SRBM_SOFT_RESET);
5065
5066                 udelay(50);
5067
5068                 tmp &= ~srbm_soft_reset;
5069                 WREG32(SRBM_SOFT_RESET, tmp);
5070                 tmp = RREG32(SRBM_SOFT_RESET);
5071         }
5072
5073         /* Wait a little for things to settle down */
5074         udelay(50);
5075
5076         evergreen_mc_resume(rdev, &save);
5077         udelay(50);
5078
5079         cik_print_gpu_status_regs(rdev);
5080 }
5081
5082 struct kv_reset_save_regs {
5083         u32 gmcon_reng_execute;
5084         u32 gmcon_misc;
5085         u32 gmcon_misc3;
5086 };
5087
5088 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5089                                    struct kv_reset_save_regs *save)
5090 {
5091         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5092         save->gmcon_misc = RREG32(GMCON_MISC);
5093         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5094
5095         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5096         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5097                                                 STCTRL_STUTTER_EN));
5098 }
5099
5100 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5101                                       struct kv_reset_save_regs *save)
5102 {
5103         int i;
5104
5105         WREG32(GMCON_PGFSM_WRITE, 0);
5106         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5107
5108         for (i = 0; i < 5; i++)
5109                 WREG32(GMCON_PGFSM_WRITE, 0);
5110
5111         WREG32(GMCON_PGFSM_WRITE, 0);
5112         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5113
5114         for (i = 0; i < 5; i++)
5115                 WREG32(GMCON_PGFSM_WRITE, 0);
5116
5117         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5118         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5119
5120         for (i = 0; i < 5; i++)
5121                 WREG32(GMCON_PGFSM_WRITE, 0);
5122
5123         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5124         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5125
5126         for (i = 0; i < 5; i++)
5127                 WREG32(GMCON_PGFSM_WRITE, 0);
5128
5129         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5130         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5131
5132         for (i = 0; i < 5; i++)
5133                 WREG32(GMCON_PGFSM_WRITE, 0);
5134
5135         WREG32(GMCON_PGFSM_WRITE, 0);
5136         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5137
5138         for (i = 0; i < 5; i++)
5139                 WREG32(GMCON_PGFSM_WRITE, 0);
5140
5141         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5142         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5143
5144         for (i = 0; i < 5; i++)
5145                 WREG32(GMCON_PGFSM_WRITE, 0);
5146
5147         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5148         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5149
5150         for (i = 0; i < 5; i++)
5151                 WREG32(GMCON_PGFSM_WRITE, 0);
5152
5153         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5154         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5155
5156         for (i = 0; i < 5; i++)
5157                 WREG32(GMCON_PGFSM_WRITE, 0);
5158
5159         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5160         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5161
5162         for (i = 0; i < 5; i++)
5163                 WREG32(GMCON_PGFSM_WRITE, 0);
5164
5165         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5166         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5167
5168         WREG32(GMCON_MISC3, save->gmcon_misc3);
5169         WREG32(GMCON_MISC, save->gmcon_misc);
5170         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5171 }
5172
5173 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5174 {
5175         struct evergreen_mc_save save;
5176         struct kv_reset_save_regs kv_save = { 0 };
5177         u32 tmp, i;
5178
5179         dev_info(rdev->dev, "GPU pci config reset\n");
5180
5181         /* disable dpm? */
5182
5183         /* disable cg/pg */
5184         cik_fini_pg(rdev);
5185         cik_fini_cg(rdev);
5186
5187         /* Disable GFX parsing/prefetching */
5188         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5189
5190         /* Disable MEC parsing/prefetching */
5191         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5192
5193         /* sdma0 */
5194         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5195         tmp |= SDMA_HALT;
5196         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5197         /* sdma1 */
5198         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5199         tmp |= SDMA_HALT;
5200         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5201         /* XXX other engines? */
5202
5203         /* halt the rlc, disable cp internal ints */
5204         cik_rlc_stop(rdev);
5205
5206         udelay(50);
5207
5208         /* disable mem access */
5209         evergreen_mc_stop(rdev, &save);
5210         if (evergreen_mc_wait_for_idle(rdev)) {
5211                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5212         }
5213
5214         if (rdev->flags & RADEON_IS_IGP)
5215                 kv_save_regs_for_reset(rdev, &kv_save);
5216
5217         /* disable BM */
5218         pci_clear_master(rdev->pdev);
5219         /* reset */
5220         radeon_pci_config_reset(rdev);
5221
5222         udelay(100);
5223
5224         /* wait for asic to come out of reset */
5225         for (i = 0; i < rdev->usec_timeout; i++) {
5226                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5227                         break;
5228                 udelay(1);
5229         }
5230
5231         /* does asic init need to be run first??? */
5232         if (rdev->flags & RADEON_IS_IGP)
5233                 kv_restore_regs_for_reset(rdev, &kv_save);
5234 }
5235
5236 /**
5237  * cik_asic_reset - soft reset GPU
5238  *
5239  * @rdev: radeon_device pointer
5240  * @hard: force hard reset
5241  *
5242  * Look up which blocks are hung and attempt
5243  * to reset them.
5244  * Returns 0 for success.
5245  */
5246 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5247 {
5248         u32 reset_mask;
5249
5250         if (hard) {
5251                 cik_gpu_pci_config_reset(rdev);
5252                 return 0;
5253         }
5254
5255         reset_mask = cik_gpu_check_soft_reset(rdev);
5256
5257         if (reset_mask)
5258                 r600_set_bios_scratch_engine_hung(rdev, true);
5259
5260         /* try soft reset */
5261         cik_gpu_soft_reset(rdev, reset_mask);
5262
5263         reset_mask = cik_gpu_check_soft_reset(rdev);
5264
5265         /* try pci config reset */
5266         if (reset_mask && radeon_hard_reset)
5267                 cik_gpu_pci_config_reset(rdev);
5268
5269         reset_mask = cik_gpu_check_soft_reset(rdev);
5270
5271         if (!reset_mask)
5272                 r600_set_bios_scratch_engine_hung(rdev, false);
5273
5274         return 0;
5275 }
5276
5277 /**
5278  * cik_gfx_is_lockup - check if the 3D engine is locked up
5279  *
5280  * @rdev: radeon_device pointer
5281  * @ring: radeon_ring structure holding ring information
5282  *
5283  * Check if the 3D engine is locked up (CIK).
5284  * Returns true if the engine is locked, false if not.
5285  */
5286 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5287 {
5288         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5289
5290         if (!(reset_mask & (RADEON_RESET_GFX |
5291                             RADEON_RESET_COMPUTE |
5292                             RADEON_RESET_CP))) {
5293                 radeon_ring_lockup_update(rdev, ring);
5294                 return false;
5295         }
5296         return radeon_ring_test_lockup(rdev, ring);
5297 }
5298
5299 /* MC */
5300 /**
5301  * cik_mc_program - program the GPU memory controller
5302  *
5303  * @rdev: radeon_device pointer
5304  *
5305  * Set the location of vram, gart, and AGP in the GPU's
5306  * physical address space (CIK).
5307  */
5308 static void cik_mc_program(struct radeon_device *rdev)
5309 {
5310         struct evergreen_mc_save save;
5311         u32 tmp;
5312         int i, j;
5313
5314         /* Initialize HDP */
5315         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5316                 WREG32((0x2c14 + j), 0x00000000);
5317                 WREG32((0x2c18 + j), 0x00000000);
5318                 WREG32((0x2c1c + j), 0x00000000);
5319                 WREG32((0x2c20 + j), 0x00000000);
5320                 WREG32((0x2c24 + j), 0x00000000);
5321         }
5322         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5323
5324         evergreen_mc_stop(rdev, &save);
5325         if (radeon_mc_wait_for_idle(rdev)) {
5326                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5327         }
5328         /* Lockout access through VGA aperture*/
5329         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5330         /* Update configuration */
5331         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5332                rdev->mc.vram_start >> 12);
5333         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5334                rdev->mc.vram_end >> 12);
5335         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5336                rdev->vram_scratch.gpu_addr >> 12);
5337         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5338         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5339         WREG32(MC_VM_FB_LOCATION, tmp);
5340         /* XXX double check these! */
5341         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5342         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5343         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5344         WREG32(MC_VM_AGP_BASE, 0);
5345         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5346         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5347         if (radeon_mc_wait_for_idle(rdev)) {
5348                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5349         }
5350         evergreen_mc_resume(rdev, &save);
5351         /* we need to own VRAM, so turn off the VGA renderer here
5352          * to stop it overwriting our objects */
5353         rv515_vga_render_disable(rdev);
5354 }
5355
5356 /**
5357  * cik_mc_init - initialize the memory controller driver params
5358  *
5359  * @rdev: radeon_device pointer
5360  *
5361  * Look up the amount of vram, vram width, and decide how to place
5362  * vram and gart within the GPU's physical address space (CIK).
5363  * Returns 0 for success.
5364  */
5365 static int cik_mc_init(struct radeon_device *rdev)
5366 {
5367         u32 tmp;
5368         int chansize, numchan;
5369
5370         /* Get VRAM informations */
5371         rdev->mc.vram_is_ddr = true;
5372         tmp = RREG32(MC_ARB_RAMCFG);
5373         if (tmp & CHANSIZE_MASK) {
5374                 chansize = 64;
5375         } else {
5376                 chansize = 32;
5377         }
5378         tmp = RREG32(MC_SHARED_CHMAP);
5379         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5380         case 0:
5381         default:
5382                 numchan = 1;
5383                 break;
5384         case 1:
5385                 numchan = 2;
5386                 break;
5387         case 2:
5388                 numchan = 4;
5389                 break;
5390         case 3:
5391                 numchan = 8;
5392                 break;
5393         case 4:
5394                 numchan = 3;
5395                 break;
5396         case 5:
5397                 numchan = 6;
5398                 break;
5399         case 6:
5400                 numchan = 10;
5401                 break;
5402         case 7:
5403                 numchan = 12;
5404                 break;
5405         case 8:
5406                 numchan = 16;
5407                 break;
5408         }
5409         rdev->mc.vram_width = numchan * chansize;
5410         /* Could aper size report 0 ? */
5411         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5412         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5413         /* size in MB on si */
5414         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5415         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5416         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5417         si_vram_gtt_location(rdev, &rdev->mc);
5418         radeon_update_bandwidth_info(rdev);
5419
5420         return 0;
5421 }
5422
5423 /*
5424  * GART
5425  * VMID 0 is the physical GPU addresses as used by the kernel.
5426  * VMIDs 1-15 are used for userspace clients and are handled
5427  * by the radeon vm/hsa code.
5428  */
5429 /**
5430  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5431  *
5432  * @rdev: radeon_device pointer
5433  *
5434  * Flush the TLB for the VMID 0 page table (CIK).
5435  */
5436 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5437 {
5438         /* flush hdp cache */
5439         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5440
5441         /* bits 0-15 are the VM contexts0-15 */
5442         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5443 }
5444
5445 /**
5446  * cik_pcie_gart_enable - gart enable
5447  *
5448  * @rdev: radeon_device pointer
5449  *
5450  * This sets up the TLBs, programs the page tables for VMID0,
5451  * sets up the hw for VMIDs 1-15 which are allocated on
5452  * demand, and sets up the global locations for the LDS, GDS,
5453  * and GPUVM for FSA64 clients (CIK).
5454  * Returns 0 for success, errors for failure.
5455  */
5456 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5457 {
5458         int r, i;
5459
5460         if (rdev->gart.robj == NULL) {
5461                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5462                 return -EINVAL;
5463         }
5464         r = radeon_gart_table_vram_pin(rdev);
5465         if (r)
5466                 return r;
5467         /* Setup TLB control */
5468         WREG32(MC_VM_MX_L1_TLB_CNTL,
5469                (0xA << 7) |
5470                ENABLE_L1_TLB |
5471                ENABLE_L1_FRAGMENT_PROCESSING |
5472                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5473                ENABLE_ADVANCED_DRIVER_MODEL |
5474                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5475         /* Setup L2 cache */
5476         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5477                ENABLE_L2_FRAGMENT_PROCESSING |
5478                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5479                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5480                EFFECTIVE_L2_QUEUE_SIZE(7) |
5481                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5482         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5483         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5484                BANK_SELECT(4) |
5485                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5486         /* setup context0 */
5487         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5488         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5489         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5490         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5491                         (u32)(rdev->dummy_page.addr >> 12));
5492         WREG32(VM_CONTEXT0_CNTL2, 0);
5493         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5494                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5495
5496         WREG32(0x15D4, 0);
5497         WREG32(0x15D8, 0);
5498         WREG32(0x15DC, 0);
5499
5500         /* restore context1-15 */
5501         /* set vm size, must be a multiple of 4 */
5502         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5503         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5504         for (i = 1; i < 16; i++) {
5505                 if (i < 8)
5506                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5507                                rdev->vm_manager.saved_table_addr[i]);
5508                 else
5509                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5510                                rdev->vm_manager.saved_table_addr[i]);
5511         }
5512
5513         /* enable context1-15 */
5514         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5515                (u32)(rdev->dummy_page.addr >> 12));
5516         WREG32(VM_CONTEXT1_CNTL2, 4);
5517         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5518                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5519                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5520                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5521                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5522                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5523                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5524                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5525                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5526                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5527                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5528                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5529                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5530                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5531
5532         if (rdev->family == CHIP_KAVERI) {
5533                 u32 tmp = RREG32(CHUB_CONTROL);
5534                 tmp &= ~BYPASS_VM;
5535                 WREG32(CHUB_CONTROL, tmp);
5536         }
5537
5538         /* XXX SH_MEM regs */
5539         /* where to put LDS, scratch, GPUVM in FSA64 space */
5540         mutex_lock(&rdev->srbm_mutex);
5541         for (i = 0; i < 16; i++) {
5542                 cik_srbm_select(rdev, 0, 0, 0, i);
5543                 /* CP and shaders */
5544                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5545                 WREG32(SH_MEM_APE1_BASE, 1);
5546                 WREG32(SH_MEM_APE1_LIMIT, 0);
5547                 WREG32(SH_MEM_BASES, 0);
5548                 /* SDMA GFX */
5549                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5550                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5551                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5552                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5553                 /* XXX SDMA RLC - todo */
5554         }
5555         cik_srbm_select(rdev, 0, 0, 0, 0);
5556         mutex_unlock(&rdev->srbm_mutex);
5557
5558         cik_pcie_gart_tlb_flush(rdev);
5559         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5560                  (unsigned)(rdev->mc.gtt_size >> 20),
5561                  (unsigned long long)rdev->gart.table_addr);
5562         rdev->gart.ready = true;
5563         return 0;
5564 }
5565
5566 /**
5567  * cik_pcie_gart_disable - gart disable
5568  *
5569  * @rdev: radeon_device pointer
5570  *
5571  * This disables all VM page table (CIK).
5572  */
5573 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5574 {
5575         unsigned i;
5576
5577         for (i = 1; i < 16; ++i) {
5578                 uint32_t reg;
5579                 if (i < 8)
5580                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5581                 else
5582                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5583                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5584         }
5585
5586         /* Disable all tables */
5587         WREG32(VM_CONTEXT0_CNTL, 0);
5588         WREG32(VM_CONTEXT1_CNTL, 0);
5589         /* Setup TLB control */
5590         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5591                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5592         /* Setup L2 cache */
5593         WREG32(VM_L2_CNTL,
5594                ENABLE_L2_FRAGMENT_PROCESSING |
5595                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5596                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5597                EFFECTIVE_L2_QUEUE_SIZE(7) |
5598                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5599         WREG32(VM_L2_CNTL2, 0);
5600         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5601                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5602         radeon_gart_table_vram_unpin(rdev);
5603 }
5604
5605 /**
5606  * cik_pcie_gart_fini - vm fini callback
5607  *
5608  * @rdev: radeon_device pointer
5609  *
5610  * Tears down the driver GART/VM setup (CIK).
5611  */
5612 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5613 {
5614         cik_pcie_gart_disable(rdev);
5615         radeon_gart_table_vram_free(rdev);
5616         radeon_gart_fini(rdev);
5617 }
5618
5619 /* vm parser */
5620 /**
5621  * cik_ib_parse - vm ib_parse callback
5622  *
5623  * @rdev: radeon_device pointer
5624  * @ib: indirect buffer pointer
5625  *
5626  * CIK uses hw IB checking so this is a nop (CIK).
5627  */
5628 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5629 {
5630         return 0;
5631 }
5632
5633 /*
5634  * vm
5635  * VMID 0 is the physical GPU addresses as used by the kernel.
5636  * VMIDs 1-15 are used for userspace clients and are handled
5637  * by the radeon vm/hsa code.
5638  */
5639 /**
5640  * cik_vm_init - cik vm init callback
5641  *
5642  * @rdev: radeon_device pointer
5643  *
5644  * Inits cik specific vm parameters (number of VMs, base of vram for
5645  * VMIDs 1-15) (CIK).
5646  * Returns 0 for success.
5647  */
5648 int cik_vm_init(struct radeon_device *rdev)
5649 {
5650         /*
5651          * number of VMs
5652          * VMID 0 is reserved for System
5653          * radeon graphics/compute will use VMIDs 1-15
5654          */
5655         rdev->vm_manager.nvm = 16;
5656         /* base offset of vram pages */
5657         if (rdev->flags & RADEON_IS_IGP) {
5658                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5659                 tmp <<= 22;
5660                 rdev->vm_manager.vram_base_offset = tmp;
5661         } else
5662                 rdev->vm_manager.vram_base_offset = 0;
5663
5664         return 0;
5665 }
5666
5667 /**
5668  * cik_vm_fini - cik vm fini callback
5669  *
5670  * @rdev: radeon_device pointer
5671  *
5672  * Tear down any asic specific VM setup (CIK).
5673  */
5674 void cik_vm_fini(struct radeon_device *rdev)
5675 {
5676 }
5677
5678 /**
5679  * cik_vm_decode_fault - print human readable fault info
5680  *
5681  * @rdev: radeon_device pointer
5682  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5683  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5684  *
5685  * Print human readable fault information (CIK).
5686  */
5687 static void cik_vm_decode_fault(struct radeon_device *rdev,
5688                                 u32 status, u32 addr, u32 mc_client)
5689 {
5690         u32 mc_id;
5691         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5692         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5693         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5694                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5695
5696         if (rdev->family == CHIP_HAWAII)
5697                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5698         else
5699                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5700
5701         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5702                protections, vmid, addr,
5703                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5704                block, mc_client, mc_id);
5705 }
5706
5707 /**
5708  * cik_vm_flush - cik vm flush using the CP
5709  *
5710  * @rdev: radeon_device pointer
5711  *
5712  * Update the page table base and flush the VM TLB
5713  * using the CP (CIK).
5714  */
5715 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5716                   unsigned vm_id, uint64_t pd_addr)
5717 {
5718         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5719
5720         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5721         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5722                                  WRITE_DATA_DST_SEL(0)));
5723         if (vm_id < 8) {
5724                 radeon_ring_write(ring,
5725                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5726         } else {
5727                 radeon_ring_write(ring,
5728                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5729         }
5730         radeon_ring_write(ring, 0);
5731         radeon_ring_write(ring, pd_addr >> 12);
5732
5733         /* update SH_MEM_* regs */
5734         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5735         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5736                                  WRITE_DATA_DST_SEL(0)));
5737         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5738         radeon_ring_write(ring, 0);
5739         radeon_ring_write(ring, VMID(vm_id));
5740
5741         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5742         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5743                                  WRITE_DATA_DST_SEL(0)));
5744         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5745         radeon_ring_write(ring, 0);
5746
5747         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5748         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5749         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5750         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5751
5752         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5753         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5754                                  WRITE_DATA_DST_SEL(0)));
5755         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5756         radeon_ring_write(ring, 0);
5757         radeon_ring_write(ring, VMID(0));
5758
5759         /* HDP flush */
5760         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5761
5762         /* bits 0-15 are the VM contexts0-15 */
5763         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5764         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5765                                  WRITE_DATA_DST_SEL(0)));
5766         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5767         radeon_ring_write(ring, 0);
5768         radeon_ring_write(ring, 1 << vm_id);
5769
5770         /* wait for the invalidate to complete */
5771         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5772         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5773                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5774                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5775         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5776         radeon_ring_write(ring, 0);
5777         radeon_ring_write(ring, 0); /* ref */
5778         radeon_ring_write(ring, 0); /* mask */
5779         radeon_ring_write(ring, 0x20); /* poll interval */
5780
5781         /* compute doesn't have PFP */
5782         if (usepfp) {
5783                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5784                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5785                 radeon_ring_write(ring, 0x0);
5786         }
5787 }
5788
5789 /*
5790  * RLC
5791  * The RLC is a multi-purpose microengine that handles a
5792  * variety of functions, the most important of which is
5793  * the interrupt controller.
5794  */
5795 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5796                                           bool enable)
5797 {
5798         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5799
5800         if (enable)
5801                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5802         else
5803                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5804         WREG32(CP_INT_CNTL_RING0, tmp);
5805 }
5806
5807 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5808 {
5809         u32 tmp;
5810
5811         tmp = RREG32(RLC_LB_CNTL);
5812         if (enable)
5813                 tmp |= LOAD_BALANCE_ENABLE;
5814         else
5815                 tmp &= ~LOAD_BALANCE_ENABLE;
5816         WREG32(RLC_LB_CNTL, tmp);
5817 }
5818
5819 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5820 {
5821         u32 i, j, k;
5822         u32 mask;
5823
5824         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5825                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5826                         cik_select_se_sh(rdev, i, j);
5827                         for (k = 0; k < rdev->usec_timeout; k++) {
5828                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5829                                         break;
5830                                 udelay(1);
5831                         }
5832                 }
5833         }
5834         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5835
5836         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5837         for (k = 0; k < rdev->usec_timeout; k++) {
5838                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5839                         break;
5840                 udelay(1);
5841         }
5842 }
5843
5844 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5845 {
5846         u32 tmp;
5847
5848         tmp = RREG32(RLC_CNTL);
5849         if (tmp != rlc)
5850                 WREG32(RLC_CNTL, rlc);
5851 }
5852
5853 static u32 cik_halt_rlc(struct radeon_device *rdev)
5854 {
5855         u32 data, orig;
5856
5857         orig = data = RREG32(RLC_CNTL);
5858
5859         if (data & RLC_ENABLE) {
5860                 u32 i;
5861
5862                 data &= ~RLC_ENABLE;
5863                 WREG32(RLC_CNTL, data);
5864
5865                 for (i = 0; i < rdev->usec_timeout; i++) {
5866                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5867                                 break;
5868                         udelay(1);
5869                 }
5870
5871                 cik_wait_for_rlc_serdes(rdev);
5872         }
5873
5874         return orig;
5875 }
5876
5877 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5878 {
5879         u32 tmp, i, mask;
5880
5881         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5882         WREG32(RLC_GPR_REG2, tmp);
5883
5884         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5885         for (i = 0; i < rdev->usec_timeout; i++) {
5886                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5887                         break;
5888                 udelay(1);
5889         }
5890
5891         for (i = 0; i < rdev->usec_timeout; i++) {
5892                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5893                         break;
5894                 udelay(1);
5895         }
5896 }
5897
5898 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5899 {
5900         u32 tmp;
5901
5902         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5903         WREG32(RLC_GPR_REG2, tmp);
5904 }
5905
5906 /**
5907  * cik_rlc_stop - stop the RLC ME
5908  *
5909  * @rdev: radeon_device pointer
5910  *
5911  * Halt the RLC ME (MicroEngine) (CIK).
5912  */
5913 static void cik_rlc_stop(struct radeon_device *rdev)
5914 {
5915         WREG32(RLC_CNTL, 0);
5916
5917         cik_enable_gui_idle_interrupt(rdev, false);
5918
5919         cik_wait_for_rlc_serdes(rdev);
5920 }
5921
5922 /**
5923  * cik_rlc_start - start the RLC ME
5924  *
5925  * @rdev: radeon_device pointer
5926  *
5927  * Unhalt the RLC ME (MicroEngine) (CIK).
5928  */
5929 static void cik_rlc_start(struct radeon_device *rdev)
5930 {
5931         WREG32(RLC_CNTL, RLC_ENABLE);
5932
5933         cik_enable_gui_idle_interrupt(rdev, true);
5934
5935         udelay(50);
5936 }
5937
5938 /**
5939  * cik_rlc_resume - setup the RLC hw
5940  *
5941  * @rdev: radeon_device pointer
5942  *
5943  * Initialize the RLC registers, load the ucode,
5944  * and start the RLC (CIK).
5945  * Returns 0 for success, -EINVAL if the ucode is not available.
5946  */
5947 static int cik_rlc_resume(struct radeon_device *rdev)
5948 {
5949         u32 i, size, tmp;
5950
5951         if (!rdev->rlc_fw)
5952                 return -EINVAL;
5953
5954         cik_rlc_stop(rdev);
5955
5956         /* disable CG */
5957         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5958         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5959
5960         si_rlc_reset(rdev);
5961
5962         cik_init_pg(rdev);
5963
5964         cik_init_cg(rdev);
5965
5966         WREG32(RLC_LB_CNTR_INIT, 0);
5967         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5968
5969         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5970         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5971         WREG32(RLC_LB_PARAMS, 0x00600408);
5972         WREG32(RLC_LB_CNTL, 0x80000004);
5973
5974         WREG32(RLC_MC_CNTL, 0);
5975         WREG32(RLC_UCODE_CNTL, 0);
5976
5977         if (rdev->new_fw) {
5978                 const struct rlc_firmware_header_v1_0 *hdr =
5979                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5980                 const __le32 *fw_data = (const __le32 *)
5981                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5982
5983                 radeon_ucode_print_rlc_hdr(&hdr->header);
5984
5985                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5986                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5987                 for (i = 0; i < size; i++)
5988                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5989                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5990         } else {
5991                 const __be32 *fw_data;
5992
5993                 switch (rdev->family) {
5994                 case CHIP_BONAIRE:
5995                 case CHIP_HAWAII:
5996                 default:
5997                         size = BONAIRE_RLC_UCODE_SIZE;
5998                         break;
5999                 case CHIP_KAVERI:
6000                         size = KV_RLC_UCODE_SIZE;
6001                         break;
6002                 case CHIP_KABINI:
6003                         size = KB_RLC_UCODE_SIZE;
6004                         break;
6005                 case CHIP_MULLINS:
6006                         size = ML_RLC_UCODE_SIZE;
6007                         break;
6008                 }
6009
6010                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6011                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6012                 for (i = 0; i < size; i++)
6013                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6014                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6015         }
6016
6017         /* XXX - find out what chips support lbpw */
6018         cik_enable_lbpw(rdev, false);
6019
6020         if (rdev->family == CHIP_BONAIRE)
6021                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6022
6023         cik_rlc_start(rdev);
6024
6025         return 0;
6026 }
6027
6028 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6029 {
6030         u32 data, orig, tmp, tmp2;
6031
6032         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6033
6034         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6035                 cik_enable_gui_idle_interrupt(rdev, true);
6036
6037                 tmp = cik_halt_rlc(rdev);
6038
6039                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6040                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6041                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6042                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6043                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6044
6045                 cik_update_rlc(rdev, tmp);
6046
6047                 data |= CGCG_EN | CGLS_EN;
6048         } else {
6049                 cik_enable_gui_idle_interrupt(rdev, false);
6050
6051                 RREG32(CB_CGTT_SCLK_CTRL);
6052                 RREG32(CB_CGTT_SCLK_CTRL);
6053                 RREG32(CB_CGTT_SCLK_CTRL);
6054                 RREG32(CB_CGTT_SCLK_CTRL);
6055
6056                 data &= ~(CGCG_EN | CGLS_EN);
6057         }
6058
6059         if (orig != data)
6060                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6061
6062 }
6063
6064 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6065 {
6066         u32 data, orig, tmp = 0;
6067
6068         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6069                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6070                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6071                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6072                                 data |= CP_MEM_LS_EN;
6073                                 if (orig != data)
6074                                         WREG32(CP_MEM_SLP_CNTL, data);
6075                         }
6076                 }
6077
6078                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6079                 data |= 0x00000001;
6080                 data &= 0xfffffffd;
6081                 if (orig != data)
6082                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6083
6084                 tmp = cik_halt_rlc(rdev);
6085
6086                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6087                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6088                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6089                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6090                 WREG32(RLC_SERDES_WR_CTRL, data);
6091
6092                 cik_update_rlc(rdev, tmp);
6093
6094                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6095                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6096                         data &= ~SM_MODE_MASK;
6097                         data |= SM_MODE(0x2);
6098                         data |= SM_MODE_ENABLE;
6099                         data &= ~CGTS_OVERRIDE;
6100                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6101                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6102                                 data &= ~CGTS_LS_OVERRIDE;
6103                         data &= ~ON_MONITOR_ADD_MASK;
6104                         data |= ON_MONITOR_ADD_EN;
6105                         data |= ON_MONITOR_ADD(0x96);
6106                         if (orig != data)
6107                                 WREG32(CGTS_SM_CTRL_REG, data);
6108                 }
6109         } else {
6110                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6111                 data |= 0x00000003;
6112                 if (orig != data)
6113                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6114
6115                 data = RREG32(RLC_MEM_SLP_CNTL);
6116                 if (data & RLC_MEM_LS_EN) {
6117                         data &= ~RLC_MEM_LS_EN;
6118                         WREG32(RLC_MEM_SLP_CNTL, data);
6119                 }
6120
6121                 data = RREG32(CP_MEM_SLP_CNTL);
6122                 if (data & CP_MEM_LS_EN) {
6123                         data &= ~CP_MEM_LS_EN;
6124                         WREG32(CP_MEM_SLP_CNTL, data);
6125                 }
6126
6127                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6128                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6129                 if (orig != data)
6130                         WREG32(CGTS_SM_CTRL_REG, data);
6131
6132                 tmp = cik_halt_rlc(rdev);
6133
6134                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6135                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6136                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6137                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6138                 WREG32(RLC_SERDES_WR_CTRL, data);
6139
6140                 cik_update_rlc(rdev, tmp);
6141         }
6142 }
6143
6144 static const u32 mc_cg_registers[] =
6145 {
6146         MC_HUB_MISC_HUB_CG,
6147         MC_HUB_MISC_SIP_CG,
6148         MC_HUB_MISC_VM_CG,
6149         MC_XPB_CLK_GAT,
6150         ATC_MISC_CG,
6151         MC_CITF_MISC_WR_CG,
6152         MC_CITF_MISC_RD_CG,
6153         MC_CITF_MISC_VM_CG,
6154         VM_L2_CG,
6155 };
6156
6157 static void cik_enable_mc_ls(struct radeon_device *rdev,
6158                              bool enable)
6159 {
6160         int i;
6161         u32 orig, data;
6162
6163         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6164                 orig = data = RREG32(mc_cg_registers[i]);
6165                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6166                         data |= MC_LS_ENABLE;
6167                 else
6168                         data &= ~MC_LS_ENABLE;
6169                 if (data != orig)
6170                         WREG32(mc_cg_registers[i], data);
6171         }
6172 }
6173
6174 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6175                                bool enable)
6176 {
6177         int i;
6178         u32 orig, data;
6179
6180         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6181                 orig = data = RREG32(mc_cg_registers[i]);
6182                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6183                         data |= MC_CG_ENABLE;
6184                 else
6185                         data &= ~MC_CG_ENABLE;
6186                 if (data != orig)
6187                         WREG32(mc_cg_registers[i], data);
6188         }
6189 }
6190
6191 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6192                                  bool enable)
6193 {
6194         u32 orig, data;
6195
6196         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6197                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6198                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6199         } else {
6200                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6201                 data |= 0xff000000;
6202                 if (data != orig)
6203                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6204
6205                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6206                 data |= 0xff000000;
6207                 if (data != orig)
6208                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6209         }
6210 }
6211
6212 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6213                                  bool enable)
6214 {
6215         u32 orig, data;
6216
6217         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6218                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6219                 data |= 0x100;
6220                 if (orig != data)
6221                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6222
6223                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6224                 data |= 0x100;
6225                 if (orig != data)
6226                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6227         } else {
6228                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6229                 data &= ~0x100;
6230                 if (orig != data)
6231                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6232
6233                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6234                 data &= ~0x100;
6235                 if (orig != data)
6236                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6237         }
6238 }
6239
6240 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6241                                 bool enable)
6242 {
6243         u32 orig, data;
6244
6245         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6246                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6247                 data = 0xfff;
6248                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6249
6250                 orig = data = RREG32(UVD_CGC_CTRL);
6251                 data |= DCM;
6252                 if (orig != data)
6253                         WREG32(UVD_CGC_CTRL, data);
6254         } else {
6255                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6256                 data &= ~0xfff;
6257                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6258
6259                 orig = data = RREG32(UVD_CGC_CTRL);
6260                 data &= ~DCM;
6261                 if (orig != data)
6262                         WREG32(UVD_CGC_CTRL, data);
6263         }
6264 }
6265
6266 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6267                                bool enable)
6268 {
6269         u32 orig, data;
6270
6271         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6272
6273         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6274                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6275                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6276         else
6277                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6278                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6279
6280         if (orig != data)
6281                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6282 }
6283
6284 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6285                                 bool enable)
6286 {
6287         u32 orig, data;
6288
6289         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6290
6291         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6292                 data &= ~CLOCK_GATING_DIS;
6293         else
6294                 data |= CLOCK_GATING_DIS;
6295
6296         if (orig != data)
6297                 WREG32(HDP_HOST_PATH_CNTL, data);
6298 }
6299
6300 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6301                               bool enable)
6302 {
6303         u32 orig, data;
6304
6305         orig = data = RREG32(HDP_MEM_POWER_LS);
6306
6307         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6308                 data |= HDP_LS_ENABLE;
6309         else
6310                 data &= ~HDP_LS_ENABLE;
6311
6312         if (orig != data)
6313                 WREG32(HDP_MEM_POWER_LS, data);
6314 }
6315
6316 void cik_update_cg(struct radeon_device *rdev,
6317                    u32 block, bool enable)
6318 {
6319
6320         if (block & RADEON_CG_BLOCK_GFX) {
6321                 cik_enable_gui_idle_interrupt(rdev, false);
6322                 /* order matters! */
6323                 if (enable) {
6324                         cik_enable_mgcg(rdev, true);
6325                         cik_enable_cgcg(rdev, true);
6326                 } else {
6327                         cik_enable_cgcg(rdev, false);
6328                         cik_enable_mgcg(rdev, false);
6329                 }
6330                 cik_enable_gui_idle_interrupt(rdev, true);
6331         }
6332
6333         if (block & RADEON_CG_BLOCK_MC) {
6334                 if (!(rdev->flags & RADEON_IS_IGP)) {
6335                         cik_enable_mc_mgcg(rdev, enable);
6336                         cik_enable_mc_ls(rdev, enable);
6337                 }
6338         }
6339
6340         if (block & RADEON_CG_BLOCK_SDMA) {
6341                 cik_enable_sdma_mgcg(rdev, enable);
6342                 cik_enable_sdma_mgls(rdev, enable);
6343         }
6344
6345         if (block & RADEON_CG_BLOCK_BIF) {
6346                 cik_enable_bif_mgls(rdev, enable);
6347         }
6348
6349         if (block & RADEON_CG_BLOCK_UVD) {
6350                 if (rdev->has_uvd)
6351                         cik_enable_uvd_mgcg(rdev, enable);
6352         }
6353
6354         if (block & RADEON_CG_BLOCK_HDP) {
6355                 cik_enable_hdp_mgcg(rdev, enable);
6356                 cik_enable_hdp_ls(rdev, enable);
6357         }
6358
6359         if (block & RADEON_CG_BLOCK_VCE) {
6360                 vce_v2_0_enable_mgcg(rdev, enable);
6361         }
6362 }
6363
6364 static void cik_init_cg(struct radeon_device *rdev)
6365 {
6366
6367         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6368
6369         if (rdev->has_uvd)
6370                 si_init_uvd_internal_cg(rdev);
6371
6372         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6373                              RADEON_CG_BLOCK_SDMA |
6374                              RADEON_CG_BLOCK_BIF |
6375                              RADEON_CG_BLOCK_UVD |
6376                              RADEON_CG_BLOCK_HDP), true);
6377 }
6378
6379 static void cik_fini_cg(struct radeon_device *rdev)
6380 {
6381         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6382                              RADEON_CG_BLOCK_SDMA |
6383                              RADEON_CG_BLOCK_BIF |
6384                              RADEON_CG_BLOCK_UVD |
6385                              RADEON_CG_BLOCK_HDP), false);
6386
6387         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6388 }
6389
6390 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6391                                           bool enable)
6392 {
6393         u32 data, orig;
6394
6395         orig = data = RREG32(RLC_PG_CNTL);
6396         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6397                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6398         else
6399                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6400         if (orig != data)
6401                 WREG32(RLC_PG_CNTL, data);
6402 }
6403
6404 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6405                                           bool enable)
6406 {
6407         u32 data, orig;
6408
6409         orig = data = RREG32(RLC_PG_CNTL);
6410         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6411                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6412         else
6413                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6414         if (orig != data)
6415                 WREG32(RLC_PG_CNTL, data);
6416 }
6417
6418 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6419 {
6420         u32 data, orig;
6421
6422         orig = data = RREG32(RLC_PG_CNTL);
6423         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6424                 data &= ~DISABLE_CP_PG;
6425         else
6426                 data |= DISABLE_CP_PG;
6427         if (orig != data)
6428                 WREG32(RLC_PG_CNTL, data);
6429 }
6430
6431 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6432 {
6433         u32 data, orig;
6434
6435         orig = data = RREG32(RLC_PG_CNTL);
6436         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6437                 data &= ~DISABLE_GDS_PG;
6438         else
6439                 data |= DISABLE_GDS_PG;
6440         if (orig != data)
6441                 WREG32(RLC_PG_CNTL, data);
6442 }
6443
6444 #define CP_ME_TABLE_SIZE    96
6445 #define CP_ME_TABLE_OFFSET  2048
6446 #define CP_MEC_TABLE_OFFSET 4096
6447
6448 void cik_init_cp_pg_table(struct radeon_device *rdev)
6449 {
6450         volatile u32 *dst_ptr;
6451         int me, i, max_me = 4;
6452         u32 bo_offset = 0;
6453         u32 table_offset, table_size;
6454
6455         if (rdev->family == CHIP_KAVERI)
6456                 max_me = 5;
6457
6458         if (rdev->rlc.cp_table_ptr == NULL)
6459                 return;
6460
6461         /* write the cp table buffer */
6462         dst_ptr = rdev->rlc.cp_table_ptr;
6463         for (me = 0; me < max_me; me++) {
6464                 if (rdev->new_fw) {
6465                         const __le32 *fw_data;
6466                         const struct gfx_firmware_header_v1_0 *hdr;
6467
6468                         if (me == 0) {
6469                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6470                                 fw_data = (const __le32 *)
6471                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6472                                 table_offset = le32_to_cpu(hdr->jt_offset);
6473                                 table_size = le32_to_cpu(hdr->jt_size);
6474                         } else if (me == 1) {
6475                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6476                                 fw_data = (const __le32 *)
6477                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6478                                 table_offset = le32_to_cpu(hdr->jt_offset);
6479                                 table_size = le32_to_cpu(hdr->jt_size);
6480                         } else if (me == 2) {
6481                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6482                                 fw_data = (const __le32 *)
6483                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6484                                 table_offset = le32_to_cpu(hdr->jt_offset);
6485                                 table_size = le32_to_cpu(hdr->jt_size);
6486                         } else if (me == 3) {
6487                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6488                                 fw_data = (const __le32 *)
6489                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6490                                 table_offset = le32_to_cpu(hdr->jt_offset);
6491                                 table_size = le32_to_cpu(hdr->jt_size);
6492                         } else {
6493                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6494                                 fw_data = (const __le32 *)
6495                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6496                                 table_offset = le32_to_cpu(hdr->jt_offset);
6497                                 table_size = le32_to_cpu(hdr->jt_size);
6498                         }
6499
6500                         for (i = 0; i < table_size; i ++) {
6501                                 dst_ptr[bo_offset + i] =
6502                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6503                         }
6504                         bo_offset += table_size;
6505                 } else {
6506                         const __be32 *fw_data;
6507                         table_size = CP_ME_TABLE_SIZE;
6508
6509                         if (me == 0) {
6510                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6511                                 table_offset = CP_ME_TABLE_OFFSET;
6512                         } else if (me == 1) {
6513                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6514                                 table_offset = CP_ME_TABLE_OFFSET;
6515                         } else if (me == 2) {
6516                                 fw_data = (const __be32 *)rdev->me_fw->data;
6517                                 table_offset = CP_ME_TABLE_OFFSET;
6518                         } else {
6519                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6520                                 table_offset = CP_MEC_TABLE_OFFSET;
6521                         }
6522
6523                         for (i = 0; i < table_size; i ++) {
6524                                 dst_ptr[bo_offset + i] =
6525                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6526                         }
6527                         bo_offset += table_size;
6528                 }
6529         }
6530 }
6531
6532 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6533                                 bool enable)
6534 {
6535         u32 data, orig;
6536
6537         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6538                 orig = data = RREG32(RLC_PG_CNTL);
6539                 data |= GFX_PG_ENABLE;
6540                 if (orig != data)
6541                         WREG32(RLC_PG_CNTL, data);
6542
6543                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6544                 data |= AUTO_PG_EN;
6545                 if (orig != data)
6546                         WREG32(RLC_AUTO_PG_CTRL, data);
6547         } else {
6548                 orig = data = RREG32(RLC_PG_CNTL);
6549                 data &= ~GFX_PG_ENABLE;
6550                 if (orig != data)
6551                         WREG32(RLC_PG_CNTL, data);
6552
6553                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6554                 data &= ~AUTO_PG_EN;
6555                 if (orig != data)
6556                         WREG32(RLC_AUTO_PG_CTRL, data);
6557
6558                 data = RREG32(DB_RENDER_CONTROL);
6559         }
6560 }
6561
6562 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6563 {
6564         u32 mask = 0, tmp, tmp1;
6565         int i;
6566
6567         cik_select_se_sh(rdev, se, sh);
6568         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6569         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6570         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6571
6572         tmp &= 0xffff0000;
6573
6574         tmp |= tmp1;
6575         tmp >>= 16;
6576
6577         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6578                 mask <<= 1;
6579                 mask |= 1;
6580         }
6581
6582         return (~tmp) & mask;
6583 }
6584
6585 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6586 {
6587         u32 i, j, k, active_cu_number = 0;
6588         u32 mask, counter, cu_bitmap;
6589         u32 tmp = 0;
6590
6591         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6592                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6593                         mask = 1;
6594                         cu_bitmap = 0;
6595                         counter = 0;
6596                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6597                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6598                                         if (counter < 2)
6599                                                 cu_bitmap |= mask;
6600                                         counter ++;
6601                                 }
6602                                 mask <<= 1;
6603                         }
6604
6605                         active_cu_number += counter;
6606                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6607                 }
6608         }
6609
6610         WREG32(RLC_PG_AO_CU_MASK, tmp);
6611
6612         tmp = RREG32(RLC_MAX_PG_CU);
6613         tmp &= ~MAX_PU_CU_MASK;
6614         tmp |= MAX_PU_CU(active_cu_number);
6615         WREG32(RLC_MAX_PG_CU, tmp);
6616 }
6617
6618 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6619                                        bool enable)
6620 {
6621         u32 data, orig;
6622
6623         orig = data = RREG32(RLC_PG_CNTL);
6624         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6625                 data |= STATIC_PER_CU_PG_ENABLE;
6626         else
6627                 data &= ~STATIC_PER_CU_PG_ENABLE;
6628         if (orig != data)
6629                 WREG32(RLC_PG_CNTL, data);
6630 }
6631
6632 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6633                                         bool enable)
6634 {
6635         u32 data, orig;
6636
6637         orig = data = RREG32(RLC_PG_CNTL);
6638         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6639                 data |= DYN_PER_CU_PG_ENABLE;
6640         else
6641                 data &= ~DYN_PER_CU_PG_ENABLE;
6642         if (orig != data)
6643                 WREG32(RLC_PG_CNTL, data);
6644 }
6645
6646 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6647 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6648
6649 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6650 {
6651         u32 data, orig;
6652         u32 i;
6653
6654         if (rdev->rlc.cs_data) {
6655                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6656                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6657                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6658                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6659         } else {
6660                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6661                 for (i = 0; i < 3; i++)
6662                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6663         }
6664         if (rdev->rlc.reg_list) {
6665                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6666                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6667                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6668         }
6669
6670         orig = data = RREG32(RLC_PG_CNTL);
6671         data |= GFX_PG_SRC;
6672         if (orig != data)
6673                 WREG32(RLC_PG_CNTL, data);
6674
6675         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6676         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6677
6678         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6679         data &= ~IDLE_POLL_COUNT_MASK;
6680         data |= IDLE_POLL_COUNT(0x60);
6681         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6682
6683         data = 0x10101010;
6684         WREG32(RLC_PG_DELAY, data);
6685
6686         data = RREG32(RLC_PG_DELAY_2);
6687         data &= ~0xff;
6688         data |= 0x3;
6689         WREG32(RLC_PG_DELAY_2, data);
6690
6691         data = RREG32(RLC_AUTO_PG_CTRL);
6692         data &= ~GRBM_REG_SGIT_MASK;
6693         data |= GRBM_REG_SGIT(0x700);
6694         WREG32(RLC_AUTO_PG_CTRL, data);
6695
6696 }
6697
6698 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6699 {
6700         cik_enable_gfx_cgpg(rdev, enable);
6701         cik_enable_gfx_static_mgpg(rdev, enable);
6702         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6703 }
6704
6705 u32 cik_get_csb_size(struct radeon_device *rdev)
6706 {
6707         u32 count = 0;
6708         const struct cs_section_def *sect = NULL;
6709         const struct cs_extent_def *ext = NULL;
6710
6711         if (rdev->rlc.cs_data == NULL)
6712                 return 0;
6713
6714         /* begin clear state */
6715         count += 2;
6716         /* context control state */
6717         count += 3;
6718
6719         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6720                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6721                         if (sect->id == SECT_CONTEXT)
6722                                 count += 2 + ext->reg_count;
6723                         else
6724                                 return 0;
6725                 }
6726         }
6727         /* pa_sc_raster_config/pa_sc_raster_config1 */
6728         count += 4;
6729         /* end clear state */
6730         count += 2;
6731         /* clear state */
6732         count += 2;
6733
6734         return count;
6735 }
6736
6737 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6738 {
6739         u32 count = 0, i;
6740         const struct cs_section_def *sect = NULL;
6741         const struct cs_extent_def *ext = NULL;
6742
6743         if (rdev->rlc.cs_data == NULL)
6744                 return;
6745         if (buffer == NULL)
6746                 return;
6747
6748         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6749         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6750
6751         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6752         buffer[count++] = cpu_to_le32(0x80000000);
6753         buffer[count++] = cpu_to_le32(0x80000000);
6754
6755         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6756                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6757                         if (sect->id == SECT_CONTEXT) {
6758                                 buffer[count++] =
6759                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6760                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6761                                 for (i = 0; i < ext->reg_count; i++)
6762                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6763                         } else {
6764                                 return;
6765                         }
6766                 }
6767         }
6768
6769         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6770         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6771         switch (rdev->family) {
6772         case CHIP_BONAIRE:
6773                 buffer[count++] = cpu_to_le32(0x16000012);
6774                 buffer[count++] = cpu_to_le32(0x00000000);
6775                 break;
6776         case CHIP_KAVERI:
6777                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6778                 buffer[count++] = cpu_to_le32(0x00000000);
6779                 break;
6780         case CHIP_KABINI:
6781         case CHIP_MULLINS:
6782                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6783                 buffer[count++] = cpu_to_le32(0x00000000);
6784                 break;
6785         case CHIP_HAWAII:
6786                 buffer[count++] = cpu_to_le32(0x3a00161a);
6787                 buffer[count++] = cpu_to_le32(0x0000002e);
6788                 break;
6789         default:
6790                 buffer[count++] = cpu_to_le32(0x00000000);
6791                 buffer[count++] = cpu_to_le32(0x00000000);
6792                 break;
6793         }
6794
6795         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6796         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6797
6798         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6799         buffer[count++] = cpu_to_le32(0);
6800 }
6801
6802 static void cik_init_pg(struct radeon_device *rdev)
6803 {
6804         if (rdev->pg_flags) {
6805                 cik_enable_sck_slowdown_on_pu(rdev, true);
6806                 cik_enable_sck_slowdown_on_pd(rdev, true);
6807                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6808                         cik_init_gfx_cgpg(rdev);
6809                         cik_enable_cp_pg(rdev, true);
6810                         cik_enable_gds_pg(rdev, true);
6811                 }
6812                 cik_init_ao_cu_mask(rdev);
6813                 cik_update_gfx_pg(rdev, true);
6814         }
6815 }
6816
6817 static void cik_fini_pg(struct radeon_device *rdev)
6818 {
6819         if (rdev->pg_flags) {
6820                 cik_update_gfx_pg(rdev, false);
6821                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6822                         cik_enable_cp_pg(rdev, false);
6823                         cik_enable_gds_pg(rdev, false);
6824                 }
6825         }
6826 }
6827
6828 /*
6829  * Interrupts
6830  * Starting with r6xx, interrupts are handled via a ring buffer.
6831  * Ring buffers are areas of GPU accessible memory that the GPU
6832  * writes interrupt vectors into and the host reads vectors out of.
6833  * There is a rptr (read pointer) that determines where the
6834  * host is currently reading, and a wptr (write pointer)
6835  * which determines where the GPU has written.  When the
6836  * pointers are equal, the ring is idle.  When the GPU
6837  * writes vectors to the ring buffer, it increments the
6838  * wptr.  When there is an interrupt, the host then starts
6839  * fetching commands and processing them until the pointers are
6840  * equal again at which point it updates the rptr.
6841  */
6842
6843 /**
6844  * cik_enable_interrupts - Enable the interrupt ring buffer
6845  *
6846  * @rdev: radeon_device pointer
6847  *
6848  * Enable the interrupt ring buffer (CIK).
6849  */
6850 static void cik_enable_interrupts(struct radeon_device *rdev)
6851 {
6852         u32 ih_cntl = RREG32(IH_CNTL);
6853         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6854
6855         ih_cntl |= ENABLE_INTR;
6856         ih_rb_cntl |= IH_RB_ENABLE;
6857         WREG32(IH_CNTL, ih_cntl);
6858         WREG32(IH_RB_CNTL, ih_rb_cntl);
6859         rdev->ih.enabled = true;
6860 }
6861
6862 /**
6863  * cik_disable_interrupts - Disable the interrupt ring buffer
6864  *
6865  * @rdev: radeon_device pointer
6866  *
6867  * Disable the interrupt ring buffer (CIK).
6868  */
6869 static void cik_disable_interrupts(struct radeon_device *rdev)
6870 {
6871         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6872         u32 ih_cntl = RREG32(IH_CNTL);
6873
6874         ih_rb_cntl &= ~IH_RB_ENABLE;
6875         ih_cntl &= ~ENABLE_INTR;
6876         WREG32(IH_RB_CNTL, ih_rb_cntl);
6877         WREG32(IH_CNTL, ih_cntl);
6878         /* set rptr, wptr to 0 */
6879         WREG32(IH_RB_RPTR, 0);
6880         WREG32(IH_RB_WPTR, 0);
6881         rdev->ih.enabled = false;
6882         rdev->ih.rptr = 0;
6883 }
6884
6885 /**
6886  * cik_disable_interrupt_state - Disable all interrupt sources
6887  *
6888  * @rdev: radeon_device pointer
6889  *
6890  * Clear all interrupt enable bits used by the driver (CIK).
6891  */
6892 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6893 {
6894         u32 tmp;
6895
6896         /* gfx ring */
6897         tmp = RREG32(CP_INT_CNTL_RING0) &
6898                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6899         WREG32(CP_INT_CNTL_RING0, tmp);
6900         /* sdma */
6901         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6902         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6903         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6904         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6905         /* compute queues */
6906         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6907         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6908         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6909         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6910         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6911         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6912         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6913         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6914         /* grbm */
6915         WREG32(GRBM_INT_CNTL, 0);
6916         /* SRBM */
6917         WREG32(SRBM_INT_CNTL, 0);
6918         /* vline/vblank, etc. */
6919         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6920         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6921         if (rdev->num_crtc >= 4) {
6922                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6923                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6924         }
6925         if (rdev->num_crtc >= 6) {
6926                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6927                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6928         }
6929         /* pflip */
6930         if (rdev->num_crtc >= 2) {
6931                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6932                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6933         }
6934         if (rdev->num_crtc >= 4) {
6935                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6936                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6937         }
6938         if (rdev->num_crtc >= 6) {
6939                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6940                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6941         }
6942
6943         /* dac hotplug */
6944         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6945
6946         /* digital hotplug */
6947         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6948         WREG32(DC_HPD1_INT_CONTROL, tmp);
6949         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6950         WREG32(DC_HPD2_INT_CONTROL, tmp);
6951         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6952         WREG32(DC_HPD3_INT_CONTROL, tmp);
6953         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6954         WREG32(DC_HPD4_INT_CONTROL, tmp);
6955         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6956         WREG32(DC_HPD5_INT_CONTROL, tmp);
6957         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6958         WREG32(DC_HPD6_INT_CONTROL, tmp);
6959
6960 }
6961
6962 /**
6963  * cik_irq_init - init and enable the interrupt ring
6964  *
6965  * @rdev: radeon_device pointer
6966  *
6967  * Allocate a ring buffer for the interrupt controller,
6968  * enable the RLC, disable interrupts, enable the IH
6969  * ring buffer and enable it (CIK).
6970  * Called at device load and reume.
6971  * Returns 0 for success, errors for failure.
6972  */
6973 static int cik_irq_init(struct radeon_device *rdev)
6974 {
6975         int ret = 0;
6976         int rb_bufsz;
6977         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6978
6979         /* allocate ring */
6980         ret = r600_ih_ring_alloc(rdev);
6981         if (ret)
6982                 return ret;
6983
6984         /* disable irqs */
6985         cik_disable_interrupts(rdev);
6986
6987         /* init rlc */
6988         ret = cik_rlc_resume(rdev);
6989         if (ret) {
6990                 r600_ih_ring_fini(rdev);
6991                 return ret;
6992         }
6993
6994         /* setup interrupt control */
6995         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6996         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6997         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6998         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6999          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7000          */
7001         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7002         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7003         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7004         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7005
7006         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7007         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7008
7009         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7010                       IH_WPTR_OVERFLOW_CLEAR |
7011                       (rb_bufsz << 1));
7012
7013         if (rdev->wb.enabled)
7014                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7015
7016         /* set the writeback address whether it's enabled or not */
7017         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7018         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7019
7020         WREG32(IH_RB_CNTL, ih_rb_cntl);
7021
7022         /* set rptr, wptr to 0 */
7023         WREG32(IH_RB_RPTR, 0);
7024         WREG32(IH_RB_WPTR, 0);
7025
7026         /* Default settings for IH_CNTL (disabled at first) */
7027         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7028         /* RPTR_REARM only works if msi's are enabled */
7029         if (rdev->msi_enabled)
7030                 ih_cntl |= RPTR_REARM;
7031         WREG32(IH_CNTL, ih_cntl);
7032
7033         /* force the active interrupt state to all disabled */
7034         cik_disable_interrupt_state(rdev);
7035
7036         pci_set_master(rdev->pdev);
7037
7038         /* enable irqs */
7039         cik_enable_interrupts(rdev);
7040
7041         return ret;
7042 }
7043
7044 /**
7045  * cik_irq_set - enable/disable interrupt sources
7046  *
7047  * @rdev: radeon_device pointer
7048  *
7049  * Enable interrupt sources on the GPU (vblanks, hpd,
7050  * etc.) (CIK).
7051  * Returns 0 for success, errors for failure.
7052  */
7053 int cik_irq_set(struct radeon_device *rdev)
7054 {
7055         u32 cp_int_cntl;
7056         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7057         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7058         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7059         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7060         u32 grbm_int_cntl = 0;
7061         u32 dma_cntl, dma_cntl1;
7062
7063         if (!rdev->irq.installed) {
7064                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7065                 return -EINVAL;
7066         }
7067         /* don't enable anything if the ih is disabled */
7068         if (!rdev->ih.enabled) {
7069                 cik_disable_interrupts(rdev);
7070                 /* force the active interrupt state to all disabled */
7071                 cik_disable_interrupt_state(rdev);
7072                 return 0;
7073         }
7074
7075         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7076                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7077         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7078
7079         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7080         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7081         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7082         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7083         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7084         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7085
7086         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7087         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7088
7089         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7090         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7091         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7092         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7093         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7094         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7095         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7096         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7097
7098         /* enable CP interrupts on all rings */
7099         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7100                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7101                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7102         }
7103         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7104                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7105                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7106                 if (ring->me == 1) {
7107                         switch (ring->pipe) {
7108                         case 0:
7109                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7110                                 break;
7111                         case 1:
7112                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7113                                 break;
7114                         case 2:
7115                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7116                                 break;
7117                         case 3:
7118                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7119                                 break;
7120                         default:
7121                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7122                                 break;
7123                         }
7124                 } else if (ring->me == 2) {
7125                         switch (ring->pipe) {
7126                         case 0:
7127                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7128                                 break;
7129                         case 1:
7130                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7131                                 break;
7132                         case 2:
7133                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7134                                 break;
7135                         case 3:
7136                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7137                                 break;
7138                         default:
7139                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7140                                 break;
7141                         }
7142                 } else {
7143                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7144                 }
7145         }
7146         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7147                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7148                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7149                 if (ring->me == 1) {
7150                         switch (ring->pipe) {
7151                         case 0:
7152                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7153                                 break;
7154                         case 1:
7155                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7156                                 break;
7157                         case 2:
7158                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7159                                 break;
7160                         case 3:
7161                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7162                                 break;
7163                         default:
7164                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7165                                 break;
7166                         }
7167                 } else if (ring->me == 2) {
7168                         switch (ring->pipe) {
7169                         case 0:
7170                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7171                                 break;
7172                         case 1:
7173                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7174                                 break;
7175                         case 2:
7176                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7177                                 break;
7178                         case 3:
7179                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7180                                 break;
7181                         default:
7182                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7183                                 break;
7184                         }
7185                 } else {
7186                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7187                 }
7188         }
7189
7190         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7191                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7192                 dma_cntl |= TRAP_ENABLE;
7193         }
7194
7195         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7196                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7197                 dma_cntl1 |= TRAP_ENABLE;
7198         }
7199
7200         if (rdev->irq.crtc_vblank_int[0] ||
7201             atomic_read(&rdev->irq.pflip[0])) {
7202                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7203                 crtc1 |= VBLANK_INTERRUPT_MASK;
7204         }
7205         if (rdev->irq.crtc_vblank_int[1] ||
7206             atomic_read(&rdev->irq.pflip[1])) {
7207                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7208                 crtc2 |= VBLANK_INTERRUPT_MASK;
7209         }
7210         if (rdev->irq.crtc_vblank_int[2] ||
7211             atomic_read(&rdev->irq.pflip[2])) {
7212                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7213                 crtc3 |= VBLANK_INTERRUPT_MASK;
7214         }
7215         if (rdev->irq.crtc_vblank_int[3] ||
7216             atomic_read(&rdev->irq.pflip[3])) {
7217                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7218                 crtc4 |= VBLANK_INTERRUPT_MASK;
7219         }
7220         if (rdev->irq.crtc_vblank_int[4] ||
7221             atomic_read(&rdev->irq.pflip[4])) {
7222                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7223                 crtc5 |= VBLANK_INTERRUPT_MASK;
7224         }
7225         if (rdev->irq.crtc_vblank_int[5] ||
7226             atomic_read(&rdev->irq.pflip[5])) {
7227                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7228                 crtc6 |= VBLANK_INTERRUPT_MASK;
7229         }
7230         if (rdev->irq.hpd[0]) {
7231                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7232                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7233         }
7234         if (rdev->irq.hpd[1]) {
7235                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7236                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7237         }
7238         if (rdev->irq.hpd[2]) {
7239                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7240                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7241         }
7242         if (rdev->irq.hpd[3]) {
7243                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7244                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7245         }
7246         if (rdev->irq.hpd[4]) {
7247                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7248                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7249         }
7250         if (rdev->irq.hpd[5]) {
7251                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7252                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7253         }
7254
7255         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7256
7257         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7258         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7259
7260         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7261         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7262         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7263         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7264         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7265         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7266         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7267         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7268
7269         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7270
7271         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7272         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7273         if (rdev->num_crtc >= 4) {
7274                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7275                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7276         }
7277         if (rdev->num_crtc >= 6) {
7278                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7279                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7280         }
7281
7282         if (rdev->num_crtc >= 2) {
7283                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7284                        GRPH_PFLIP_INT_MASK);
7285                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7286                        GRPH_PFLIP_INT_MASK);
7287         }
7288         if (rdev->num_crtc >= 4) {
7289                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7290                        GRPH_PFLIP_INT_MASK);
7291                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7292                        GRPH_PFLIP_INT_MASK);
7293         }
7294         if (rdev->num_crtc >= 6) {
7295                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7296                        GRPH_PFLIP_INT_MASK);
7297                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7298                        GRPH_PFLIP_INT_MASK);
7299         }
7300
7301         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7302         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7303         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7304         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7305         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7306         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7307
7308         /* posting read */
7309         RREG32(SRBM_STATUS);
7310
7311         return 0;
7312 }
7313
7314 /**
7315  * cik_irq_ack - ack interrupt sources
7316  *
7317  * @rdev: radeon_device pointer
7318  *
7319  * Ack interrupt sources on the GPU (vblanks, hpd,
7320  * etc.) (CIK).  Certain interrupts sources are sw
7321  * generated and do not require an explicit ack.
7322  */
7323 static inline void cik_irq_ack(struct radeon_device *rdev)
7324 {
7325         u32 tmp;
7326
7327         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7328         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7329         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7330         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7331         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7332         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7333         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7334
7335         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7336                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7337         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7338                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7339         if (rdev->num_crtc >= 4) {
7340                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7341                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7342                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7343                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7344         }
7345         if (rdev->num_crtc >= 6) {
7346                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7347                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7348                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7349                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7350         }
7351
7352         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7353                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7354                        GRPH_PFLIP_INT_CLEAR);
7355         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7356                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7357                        GRPH_PFLIP_INT_CLEAR);
7358         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7359                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7360         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7361                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7362         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7363                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7364         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7365                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7366
7367         if (rdev->num_crtc >= 4) {
7368                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7369                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7370                                GRPH_PFLIP_INT_CLEAR);
7371                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7372                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7373                                GRPH_PFLIP_INT_CLEAR);
7374                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7375                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7376                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7377                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7378                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7379                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7380                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7381                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7382         }
7383
7384         if (rdev->num_crtc >= 6) {
7385                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7386                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7387                                GRPH_PFLIP_INT_CLEAR);
7388                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7389                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7390                                GRPH_PFLIP_INT_CLEAR);
7391                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7392                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7393                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7394                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7395                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7396                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7397                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7398                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7399         }
7400
7401         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7402                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7403                 tmp |= DC_HPDx_INT_ACK;
7404                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7405         }
7406         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7407                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7408                 tmp |= DC_HPDx_INT_ACK;
7409                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7410         }
7411         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7412                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7413                 tmp |= DC_HPDx_INT_ACK;
7414                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7415         }
7416         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7417                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7418                 tmp |= DC_HPDx_INT_ACK;
7419                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7420         }
7421         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7422                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7423                 tmp |= DC_HPDx_INT_ACK;
7424                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7425         }
7426         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7427                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7428                 tmp |= DC_HPDx_INT_ACK;
7429                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7430         }
7431         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7432                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7433                 tmp |= DC_HPDx_RX_INT_ACK;
7434                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7435         }
7436         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7437                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7438                 tmp |= DC_HPDx_RX_INT_ACK;
7439                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7440         }
7441         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7442                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7443                 tmp |= DC_HPDx_RX_INT_ACK;
7444                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7445         }
7446         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7447                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7448                 tmp |= DC_HPDx_RX_INT_ACK;
7449                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7450         }
7451         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7452                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7453                 tmp |= DC_HPDx_RX_INT_ACK;
7454                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7455         }
7456         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7457                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7458                 tmp |= DC_HPDx_RX_INT_ACK;
7459                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7460         }
7461 }
7462
7463 /**
7464  * cik_irq_disable - disable interrupts
7465  *
7466  * @rdev: radeon_device pointer
7467  *
7468  * Disable interrupts on the hw (CIK).
7469  */
7470 static void cik_irq_disable(struct radeon_device *rdev)
7471 {
7472         cik_disable_interrupts(rdev);
7473         /* Wait and acknowledge irq */
7474         mdelay(1);
7475         cik_irq_ack(rdev);
7476         cik_disable_interrupt_state(rdev);
7477 }
7478
7479 /**
7480  * cik_irq_disable - disable interrupts for suspend
7481  *
7482  * @rdev: radeon_device pointer
7483  *
7484  * Disable interrupts and stop the RLC (CIK).
7485  * Used for suspend.
7486  */
7487 static void cik_irq_suspend(struct radeon_device *rdev)
7488 {
7489         cik_irq_disable(rdev);
7490         cik_rlc_stop(rdev);
7491 }
7492
7493 /**
7494  * cik_irq_fini - tear down interrupt support
7495  *
7496  * @rdev: radeon_device pointer
7497  *
7498  * Disable interrupts on the hw and free the IH ring
7499  * buffer (CIK).
7500  * Used for driver unload.
7501  */
7502 static void cik_irq_fini(struct radeon_device *rdev)
7503 {
7504         cik_irq_suspend(rdev);
7505         r600_ih_ring_fini(rdev);
7506 }
7507
7508 /**
7509  * cik_get_ih_wptr - get the IH ring buffer wptr
7510  *
7511  * @rdev: radeon_device pointer
7512  *
7513  * Get the IH ring buffer wptr from either the register
7514  * or the writeback memory buffer (CIK).  Also check for
7515  * ring buffer overflow and deal with it.
7516  * Used by cik_irq_process().
7517  * Returns the value of the wptr.
7518  */
7519 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7520 {
7521         u32 wptr, tmp;
7522
7523         if (rdev->wb.enabled)
7524                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7525         else
7526                 wptr = RREG32(IH_RB_WPTR);
7527
7528         if (wptr & RB_OVERFLOW) {
7529                 wptr &= ~RB_OVERFLOW;
7530                 /* When a ring buffer overflow happen start parsing interrupt
7531                  * from the last not overwritten vector (wptr + 16). Hopefully
7532                  * this should allow us to catchup.
7533                  */
7534                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7535                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7536                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7537                 tmp = RREG32(IH_RB_CNTL);
7538                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7539                 WREG32(IH_RB_CNTL, tmp);
7540         }
7541         return (wptr & rdev->ih.ptr_mask);
7542 }
7543
7544 /*        CIK IV Ring
7545  * Each IV ring entry is 128 bits:
7546  * [7:0]    - interrupt source id
7547  * [31:8]   - reserved
7548  * [59:32]  - interrupt source data
7549  * [63:60]  - reserved
7550  * [71:64]  - RINGID
7551  *            CP:
7552  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7553  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7554  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7555  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7556  *            PIPE_ID - ME0 0=3D
7557  *                    - ME1&2 compute dispatcher (4 pipes each)
7558  *            SDMA:
7559  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7560  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7561  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7562  * [79:72]  - VMID
7563  * [95:80]  - PASID
7564  * [127:96] - reserved
7565  */
7566 /**
7567  * cik_irq_process - interrupt handler
7568  *
7569  * @rdev: radeon_device pointer
7570  *
7571  * Interrupt hander (CIK).  Walk the IH ring,
7572  * ack interrupts and schedule work to handle
7573  * interrupt events.
7574  * Returns irq process return code.
7575  */
7576 int cik_irq_process(struct radeon_device *rdev)
7577 {
7578         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7579         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7580         u32 wptr;
7581         u32 rptr;
7582         u32 src_id, src_data, ring_id;
7583         u8 me_id, pipe_id, queue_id;
7584         u32 ring_index;
7585         bool queue_hotplug = false;
7586         bool queue_dp = false;
7587         bool queue_reset = false;
7588         u32 addr, status, mc_client;
7589         bool queue_thermal = false;
7590
7591         if (!rdev->ih.enabled || rdev->shutdown)
7592                 return IRQ_NONE;
7593
7594         wptr = cik_get_ih_wptr(rdev);
7595
7596 restart_ih:
7597         /* is somebody else already processing irqs? */
7598         if (atomic_xchg(&rdev->ih.lock, 1))
7599                 return IRQ_NONE;
7600
7601         rptr = rdev->ih.rptr;
7602         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7603
7604         /* Order reading of wptr vs. reading of IH ring data */
7605         rmb();
7606
7607         /* display interrupts */
7608         cik_irq_ack(rdev);
7609
7610         while (rptr != wptr) {
7611                 /* wptr/rptr are in bytes! */
7612                 ring_index = rptr / 4;
7613
7614                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7615                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7616                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7617
7618                 switch (src_id) {
7619                 case 1: /* D1 vblank/vline */
7620                         switch (src_data) {
7621                         case 0: /* D1 vblank */
7622                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7623                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7624
7625                                 if (rdev->irq.crtc_vblank_int[0]) {
7626                                         drm_handle_vblank(rdev->ddev, 0);
7627                                         rdev->pm.vblank_sync = true;
7628                                         wake_up(&rdev->irq.vblank_queue);
7629                                 }
7630                                 if (atomic_read(&rdev->irq.pflip[0]))
7631                                         radeon_crtc_handle_vblank(rdev, 0);
7632                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7633                                 DRM_DEBUG("IH: D1 vblank\n");
7634
7635                                 break;
7636                         case 1: /* D1 vline */
7637                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7638                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7639
7640                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7641                                 DRM_DEBUG("IH: D1 vline\n");
7642
7643                                 break;
7644                         default:
7645                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7646                                 break;
7647                         }
7648                         break;
7649                 case 2: /* D2 vblank/vline */
7650                         switch (src_data) {
7651                         case 0: /* D2 vblank */
7652                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7653                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7654
7655                                 if (rdev->irq.crtc_vblank_int[1]) {
7656                                         drm_handle_vblank(rdev->ddev, 1);
7657                                         rdev->pm.vblank_sync = true;
7658                                         wake_up(&rdev->irq.vblank_queue);
7659                                 }
7660                                 if (atomic_read(&rdev->irq.pflip[1]))
7661                                         radeon_crtc_handle_vblank(rdev, 1);
7662                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7663                                 DRM_DEBUG("IH: D2 vblank\n");
7664
7665                                 break;
7666                         case 1: /* D2 vline */
7667                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7668                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7669
7670                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7671                                 DRM_DEBUG("IH: D2 vline\n");
7672
7673                                 break;
7674                         default:
7675                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7676                                 break;
7677                         }
7678                         break;
7679                 case 3: /* D3 vblank/vline */
7680                         switch (src_data) {
7681                         case 0: /* D3 vblank */
7682                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7683                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7684
7685                                 if (rdev->irq.crtc_vblank_int[2]) {
7686                                         drm_handle_vblank(rdev->ddev, 2);
7687                                         rdev->pm.vblank_sync = true;
7688                                         wake_up(&rdev->irq.vblank_queue);
7689                                 }
7690                                 if (atomic_read(&rdev->irq.pflip[2]))
7691                                         radeon_crtc_handle_vblank(rdev, 2);
7692                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7693                                 DRM_DEBUG("IH: D3 vblank\n");
7694
7695                                 break;
7696                         case 1: /* D3 vline */
7697                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7698                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7699
7700                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7701                                 DRM_DEBUG("IH: D3 vline\n");
7702
7703                                 break;
7704                         default:
7705                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7706                                 break;
7707                         }
7708                         break;
7709                 case 4: /* D4 vblank/vline */
7710                         switch (src_data) {
7711                         case 0: /* D4 vblank */
7712                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7713                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7714
7715                                 if (rdev->irq.crtc_vblank_int[3]) {
7716                                         drm_handle_vblank(rdev->ddev, 3);
7717                                         rdev->pm.vblank_sync = true;
7718                                         wake_up(&rdev->irq.vblank_queue);
7719                                 }
7720                                 if (atomic_read(&rdev->irq.pflip[3]))
7721                                         radeon_crtc_handle_vblank(rdev, 3);
7722                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7723                                 DRM_DEBUG("IH: D4 vblank\n");
7724
7725                                 break;
7726                         case 1: /* D4 vline */
7727                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7728                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7729
7730                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7731                                 DRM_DEBUG("IH: D4 vline\n");
7732
7733                                 break;
7734                         default:
7735                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7736                                 break;
7737                         }
7738                         break;
7739                 case 5: /* D5 vblank/vline */
7740                         switch (src_data) {
7741                         case 0: /* D5 vblank */
7742                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7743                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7744
7745                                 if (rdev->irq.crtc_vblank_int[4]) {
7746                                         drm_handle_vblank(rdev->ddev, 4);
7747                                         rdev->pm.vblank_sync = true;
7748                                         wake_up(&rdev->irq.vblank_queue);
7749                                 }
7750                                 if (atomic_read(&rdev->irq.pflip[4]))
7751                                         radeon_crtc_handle_vblank(rdev, 4);
7752                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7753                                 DRM_DEBUG("IH: D5 vblank\n");
7754
7755                                 break;
7756                         case 1: /* D5 vline */
7757                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7758                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7759
7760                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7761                                 DRM_DEBUG("IH: D5 vline\n");
7762
7763                                 break;
7764                         default:
7765                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7766                                 break;
7767                         }
7768                         break;
7769                 case 6: /* D6 vblank/vline */
7770                         switch (src_data) {
7771                         case 0: /* D6 vblank */
7772                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7773                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7774
7775                                 if (rdev->irq.crtc_vblank_int[5]) {
7776                                         drm_handle_vblank(rdev->ddev, 5);
7777                                         rdev->pm.vblank_sync = true;
7778                                         wake_up(&rdev->irq.vblank_queue);
7779                                 }
7780                                 if (atomic_read(&rdev->irq.pflip[5]))
7781                                         radeon_crtc_handle_vblank(rdev, 5);
7782                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7783                                 DRM_DEBUG("IH: D6 vblank\n");
7784
7785                                 break;
7786                         case 1: /* D6 vline */
7787                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7788                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7789
7790                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7791                                 DRM_DEBUG("IH: D6 vline\n");
7792
7793                                 break;
7794                         default:
7795                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7796                                 break;
7797                         }
7798                         break;
7799                 case 8: /* D1 page flip */
7800                 case 10: /* D2 page flip */
7801                 case 12: /* D3 page flip */
7802                 case 14: /* D4 page flip */
7803                 case 16: /* D5 page flip */
7804                 case 18: /* D6 page flip */
7805                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7806                         if (radeon_use_pflipirq > 0)
7807                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7808                         break;
7809                 case 42: /* HPD hotplug */
7810                         switch (src_data) {
7811                         case 0:
7812                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7813                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7814
7815                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7816                                 queue_hotplug = true;
7817                                 DRM_DEBUG("IH: HPD1\n");
7818
7819                                 break;
7820                         case 1:
7821                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7822                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7823
7824                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7825                                 queue_hotplug = true;
7826                                 DRM_DEBUG("IH: HPD2\n");
7827
7828                                 break;
7829                         case 2:
7830                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7831                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7832
7833                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7834                                 queue_hotplug = true;
7835                                 DRM_DEBUG("IH: HPD3\n");
7836
7837                                 break;
7838                         case 3:
7839                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7840                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7841
7842                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7843                                 queue_hotplug = true;
7844                                 DRM_DEBUG("IH: HPD4\n");
7845
7846                                 break;
7847                         case 4:
7848                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7849                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7850
7851                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7852                                 queue_hotplug = true;
7853                                 DRM_DEBUG("IH: HPD5\n");
7854
7855                                 break;
7856                         case 5:
7857                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7858                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7859
7860                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7861                                 queue_hotplug = true;
7862                                 DRM_DEBUG("IH: HPD6\n");
7863
7864                                 break;
7865                         case 6:
7866                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7867                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7868
7869                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7870                                 queue_dp = true;
7871                                 DRM_DEBUG("IH: HPD_RX 1\n");
7872
7873                                 break;
7874                         case 7:
7875                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7876                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7877
7878                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7879                                 queue_dp = true;
7880                                 DRM_DEBUG("IH: HPD_RX 2\n");
7881
7882                                 break;
7883                         case 8:
7884                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7885                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7886
7887                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7888                                 queue_dp = true;
7889                                 DRM_DEBUG("IH: HPD_RX 3\n");
7890
7891                                 break;
7892                         case 9:
7893                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7894                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7895
7896                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7897                                 queue_dp = true;
7898                                 DRM_DEBUG("IH: HPD_RX 4\n");
7899
7900                                 break;
7901                         case 10:
7902                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7903                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7904
7905                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7906                                 queue_dp = true;
7907                                 DRM_DEBUG("IH: HPD_RX 5\n");
7908
7909                                 break;
7910                         case 11:
7911                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7912                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7913
7914                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7915                                 queue_dp = true;
7916                                 DRM_DEBUG("IH: HPD_RX 6\n");
7917
7918                                 break;
7919                         default:
7920                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7921                                 break;
7922                         }
7923                         break;
7924                 case 96:
7925                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7926                         WREG32(SRBM_INT_ACK, 0x1);
7927                         break;
7928                 case 124: /* UVD */
7929                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7930                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7931                         break;
7932                 case 146:
7933                 case 147:
7934                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7935                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7936                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7937                         /* reset addr and status */
7938                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7939                         if (addr == 0x0 && status == 0x0)
7940                                 break;
7941                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7942                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7943                                 addr);
7944                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7945                                 status);
7946                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7947                         break;
7948                 case 167: /* VCE */
7949                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7950                         switch (src_data) {
7951                         case 0:
7952                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7953                                 break;
7954                         case 1:
7955                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7956                                 break;
7957                         default:
7958                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7959                                 break;
7960                         }
7961                         break;
7962                 case 176: /* GFX RB CP_INT */
7963                 case 177: /* GFX IB CP_INT */
7964                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7965                         break;
7966                 case 181: /* CP EOP event */
7967                         DRM_DEBUG("IH: CP EOP\n");
7968                         /* XXX check the bitfield order! */
7969                         me_id = (ring_id & 0x60) >> 5;
7970                         pipe_id = (ring_id & 0x18) >> 3;
7971                         queue_id = (ring_id & 0x7) >> 0;
7972                         switch (me_id) {
7973                         case 0:
7974                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7975                                 break;
7976                         case 1:
7977                         case 2:
7978                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7979                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7980                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7981                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7982                                 break;
7983                         }
7984                         break;
7985                 case 184: /* CP Privileged reg access */
7986                         DRM_ERROR("Illegal register access in command stream\n");
7987                         /* XXX check the bitfield order! */
7988                         me_id = (ring_id & 0x60) >> 5;
7989                         pipe_id = (ring_id & 0x18) >> 3;
7990                         queue_id = (ring_id & 0x7) >> 0;
7991                         switch (me_id) {
7992                         case 0:
7993                                 /* This results in a full GPU reset, but all we need to do is soft
7994                                  * reset the CP for gfx
7995                                  */
7996                                 queue_reset = true;
7997                                 break;
7998                         case 1:
7999                                 /* XXX compute */
8000                                 queue_reset = true;
8001                                 break;
8002                         case 2:
8003                                 /* XXX compute */
8004                                 queue_reset = true;
8005                                 break;
8006                         }
8007                         break;
8008                 case 185: /* CP Privileged inst */
8009                         DRM_ERROR("Illegal instruction in command stream\n");
8010                         /* XXX check the bitfield order! */
8011                         me_id = (ring_id & 0x60) >> 5;
8012                         pipe_id = (ring_id & 0x18) >> 3;
8013                         queue_id = (ring_id & 0x7) >> 0;
8014                         switch (me_id) {
8015                         case 0:
8016                                 /* This results in a full GPU reset, but all we need to do is soft
8017                                  * reset the CP for gfx
8018                                  */
8019                                 queue_reset = true;
8020                                 break;
8021                         case 1:
8022                                 /* XXX compute */
8023                                 queue_reset = true;
8024                                 break;
8025                         case 2:
8026                                 /* XXX compute */
8027                                 queue_reset = true;
8028                                 break;
8029                         }
8030                         break;
8031                 case 224: /* SDMA trap event */
8032                         /* XXX check the bitfield order! */
8033                         me_id = (ring_id & 0x3) >> 0;
8034                         queue_id = (ring_id & 0xc) >> 2;
8035                         DRM_DEBUG("IH: SDMA trap\n");
8036                         switch (me_id) {
8037                         case 0:
8038                                 switch (queue_id) {
8039                                 case 0:
8040                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8041                                         break;
8042                                 case 1:
8043                                         /* XXX compute */
8044                                         break;
8045                                 case 2:
8046                                         /* XXX compute */
8047                                         break;
8048                                 }
8049                                 break;
8050                         case 1:
8051                                 switch (queue_id) {
8052                                 case 0:
8053                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8054                                         break;
8055                                 case 1:
8056                                         /* XXX compute */
8057                                         break;
8058                                 case 2:
8059                                         /* XXX compute */
8060                                         break;
8061                                 }
8062                                 break;
8063                         }
8064                         break;
8065                 case 230: /* thermal low to high */
8066                         DRM_DEBUG("IH: thermal low to high\n");
8067                         rdev->pm.dpm.thermal.high_to_low = false;
8068                         queue_thermal = true;
8069                         break;
8070                 case 231: /* thermal high to low */
8071                         DRM_DEBUG("IH: thermal high to low\n");
8072                         rdev->pm.dpm.thermal.high_to_low = true;
8073                         queue_thermal = true;
8074                         break;
8075                 case 233: /* GUI IDLE */
8076                         DRM_DEBUG("IH: GUI idle\n");
8077                         break;
8078                 case 241: /* SDMA Privileged inst */
8079                 case 247: /* SDMA Privileged inst */
8080                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8081                         /* XXX check the bitfield order! */
8082                         me_id = (ring_id & 0x3) >> 0;
8083                         queue_id = (ring_id & 0xc) >> 2;
8084                         switch (me_id) {
8085                         case 0:
8086                                 switch (queue_id) {
8087                                 case 0:
8088                                         queue_reset = true;
8089                                         break;
8090                                 case 1:
8091                                         /* XXX compute */
8092                                         queue_reset = true;
8093                                         break;
8094                                 case 2:
8095                                         /* XXX compute */
8096                                         queue_reset = true;
8097                                         break;
8098                                 }
8099                                 break;
8100                         case 1:
8101                                 switch (queue_id) {
8102                                 case 0:
8103                                         queue_reset = true;
8104                                         break;
8105                                 case 1:
8106                                         /* XXX compute */
8107                                         queue_reset = true;
8108                                         break;
8109                                 case 2:
8110                                         /* XXX compute */
8111                                         queue_reset = true;
8112                                         break;
8113                                 }
8114                                 break;
8115                         }
8116                         break;
8117                 default:
8118                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8119                         break;
8120                 }
8121
8122                 /* wptr/rptr are in bytes! */
8123                 rptr += 16;
8124                 rptr &= rdev->ih.ptr_mask;
8125                 WREG32(IH_RB_RPTR, rptr);
8126         }
8127         if (queue_dp)
8128                 schedule_work(&rdev->dp_work);
8129         if (queue_hotplug)
8130                 schedule_delayed_work(&rdev->hotplug_work, 0);
8131         if (queue_reset) {
8132                 rdev->needs_reset = true;
8133                 wake_up_all(&rdev->fence_queue);
8134         }
8135         if (queue_thermal)
8136                 schedule_work(&rdev->pm.dpm.thermal.work);
8137         rdev->ih.rptr = rptr;
8138         atomic_set(&rdev->ih.lock, 0);
8139
8140         /* make sure wptr hasn't changed while processing */
8141         wptr = cik_get_ih_wptr(rdev);
8142         if (wptr != rptr)
8143                 goto restart_ih;
8144
8145         return IRQ_HANDLED;
8146 }
8147
8148 /*
8149  * startup/shutdown callbacks
8150  */
8151 static void cik_uvd_init(struct radeon_device *rdev)
8152 {
8153         int r;
8154
8155         if (!rdev->has_uvd)
8156                 return;
8157
8158         r = radeon_uvd_init(rdev);
8159         if (r) {
8160                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8161                 /*
8162                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8163                  * to early fails cik_uvd_start() and thus nothing happens
8164                  * there. So it is pointless to try to go through that code
8165                  * hence why we disable uvd here.
8166                  */
8167                 rdev->has_uvd = 0;
8168                 return;
8169         }
8170         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8171         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8172 }
8173
8174 static void cik_uvd_start(struct radeon_device *rdev)
8175 {
8176         int r;
8177
8178         if (!rdev->has_uvd)
8179                 return;
8180
8181         r = radeon_uvd_resume(rdev);
8182         if (r) {
8183                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8184                 goto error;
8185         }
8186         r = uvd_v4_2_resume(rdev);
8187         if (r) {
8188                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8189                 goto error;
8190         }
8191         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8192         if (r) {
8193                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8194                 goto error;
8195         }
8196         return;
8197
8198 error:
8199         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8200 }
8201
8202 static void cik_uvd_resume(struct radeon_device *rdev)
8203 {
8204         struct radeon_ring *ring;
8205         int r;
8206
8207         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8208                 return;
8209
8210         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8211         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8212         if (r) {
8213                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8214                 return;
8215         }
8216         r = uvd_v1_0_init(rdev);
8217         if (r) {
8218                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8219                 return;
8220         }
8221 }
8222
8223 static void cik_vce_init(struct radeon_device *rdev)
8224 {
8225         int r;
8226
8227         if (!rdev->has_vce)
8228                 return;
8229
8230         r = radeon_vce_init(rdev);
8231         if (r) {
8232                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8233                 /*
8234                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8235                  * to early fails cik_vce_start() and thus nothing happens
8236                  * there. So it is pointless to try to go through that code
8237                  * hence why we disable vce here.
8238                  */
8239                 rdev->has_vce = 0;
8240                 return;
8241         }
8242         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8243         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8244         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8245         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8246 }
8247
8248 static void cik_vce_start(struct radeon_device *rdev)
8249 {
8250         int r;
8251
8252         if (!rdev->has_vce)
8253                 return;
8254
8255         r = radeon_vce_resume(rdev);
8256         if (r) {
8257                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8258                 goto error;
8259         }
8260         r = vce_v2_0_resume(rdev);
8261         if (r) {
8262                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8263                 goto error;
8264         }
8265         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8266         if (r) {
8267                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8268                 goto error;
8269         }
8270         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8271         if (r) {
8272                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8273                 goto error;
8274         }
8275         return;
8276
8277 error:
8278         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8279         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8280 }
8281
8282 static void cik_vce_resume(struct radeon_device *rdev)
8283 {
8284         struct radeon_ring *ring;
8285         int r;
8286
8287         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8288                 return;
8289
8290         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8291         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8292         if (r) {
8293                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8294                 return;
8295         }
8296         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8297         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8298         if (r) {
8299                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8300                 return;
8301         }
8302         r = vce_v1_0_init(rdev);
8303         if (r) {
8304                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8305                 return;
8306         }
8307 }
8308
8309 /**
8310  * cik_startup - program the asic to a functional state
8311  *
8312  * @rdev: radeon_device pointer
8313  *
8314  * Programs the asic to a functional state (CIK).
8315  * Called by cik_init() and cik_resume().
8316  * Returns 0 for success, error for failure.
8317  */
8318 static int cik_startup(struct radeon_device *rdev)
8319 {
8320         struct radeon_ring *ring;
8321         u32 nop;
8322         int r;
8323
8324         /* enable pcie gen2/3 link */
8325         cik_pcie_gen3_enable(rdev);
8326         /* enable aspm */
8327         cik_program_aspm(rdev);
8328
8329         /* scratch needs to be initialized before MC */
8330         r = r600_vram_scratch_init(rdev);
8331         if (r)
8332                 return r;
8333
8334         cik_mc_program(rdev);
8335
8336         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8337                 r = ci_mc_load_microcode(rdev);
8338                 if (r) {
8339                         DRM_ERROR("Failed to load MC firmware!\n");
8340                         return r;
8341                 }
8342         }
8343
8344         r = cik_pcie_gart_enable(rdev);
8345         if (r)
8346                 return r;
8347         cik_gpu_init(rdev);
8348
8349         /* allocate rlc buffers */
8350         if (rdev->flags & RADEON_IS_IGP) {
8351                 if (rdev->family == CHIP_KAVERI) {
8352                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8353                         rdev->rlc.reg_list_size =
8354                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8355                 } else {
8356                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8357                         rdev->rlc.reg_list_size =
8358                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8359                 }
8360         }
8361         rdev->rlc.cs_data = ci_cs_data;
8362         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8363         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8364         r = sumo_rlc_init(rdev);
8365         if (r) {
8366                 DRM_ERROR("Failed to init rlc BOs!\n");
8367                 return r;
8368         }
8369
8370         /* allocate wb buffer */
8371         r = radeon_wb_init(rdev);
8372         if (r)
8373                 return r;
8374
8375         /* allocate mec buffers */
8376         r = cik_mec_init(rdev);
8377         if (r) {
8378                 DRM_ERROR("Failed to init MEC BOs!\n");
8379                 return r;
8380         }
8381
8382         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8383         if (r) {
8384                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8385                 return r;
8386         }
8387
8388         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8389         if (r) {
8390                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8391                 return r;
8392         }
8393
8394         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8395         if (r) {
8396                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8397                 return r;
8398         }
8399
8400         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8401         if (r) {
8402                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8403                 return r;
8404         }
8405
8406         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8407         if (r) {
8408                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8409                 return r;
8410         }
8411
8412         cik_uvd_start(rdev);
8413         cik_vce_start(rdev);
8414
8415         /* Enable IRQ */
8416         if (!rdev->irq.installed) {
8417                 r = radeon_irq_kms_init(rdev);
8418                 if (r)
8419                         return r;
8420         }
8421
8422         r = cik_irq_init(rdev);
8423         if (r) {
8424                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8425                 radeon_irq_kms_fini(rdev);
8426                 return r;
8427         }
8428         cik_irq_set(rdev);
8429
8430         if (rdev->family == CHIP_HAWAII) {
8431                 if (rdev->new_fw)
8432                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8433                 else
8434                         nop = RADEON_CP_PACKET2;
8435         } else {
8436                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8437         }
8438
8439         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8440         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8441                              nop);
8442         if (r)
8443                 return r;
8444
8445         /* set up the compute queues */
8446         /* type-2 packets are deprecated on MEC, use type-3 instead */
8447         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8448         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8449                              nop);
8450         if (r)
8451                 return r;
8452         ring->me = 1; /* first MEC */
8453         ring->pipe = 0; /* first pipe */
8454         ring->queue = 0; /* first queue */
8455         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8456
8457         /* type-2 packets are deprecated on MEC, use type-3 instead */
8458         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8459         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8460                              nop);
8461         if (r)
8462                 return r;
8463         /* dGPU only have 1 MEC */
8464         ring->me = 1; /* first MEC */
8465         ring->pipe = 0; /* first pipe */
8466         ring->queue = 1; /* second queue */
8467         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8468
8469         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8470         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8471                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8472         if (r)
8473                 return r;
8474
8475         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8476         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8477                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8478         if (r)
8479                 return r;
8480
8481         r = cik_cp_resume(rdev);
8482         if (r)
8483                 return r;
8484
8485         r = cik_sdma_resume(rdev);
8486         if (r)
8487                 return r;
8488
8489         cik_uvd_resume(rdev);
8490         cik_vce_resume(rdev);
8491
8492         r = radeon_ib_pool_init(rdev);
8493         if (r) {
8494                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8495                 return r;
8496         }
8497
8498         r = radeon_vm_manager_init(rdev);
8499         if (r) {
8500                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8501                 return r;
8502         }
8503
8504         r = radeon_audio_init(rdev);
8505         if (r)
8506                 return r;
8507
8508         return 0;
8509 }
8510
8511 /**
8512  * cik_resume - resume the asic to a functional state
8513  *
8514  * @rdev: radeon_device pointer
8515  *
8516  * Programs the asic to a functional state (CIK).
8517  * Called at resume.
8518  * Returns 0 for success, error for failure.
8519  */
8520 int cik_resume(struct radeon_device *rdev)
8521 {
8522         int r;
8523
8524         /* post card */
8525         atom_asic_init(rdev->mode_info.atom_context);
8526
8527         /* init golden registers */
8528         cik_init_golden_registers(rdev);
8529
8530         if (rdev->pm.pm_method == PM_METHOD_DPM)
8531                 radeon_pm_resume(rdev);
8532
8533         rdev->accel_working = true;
8534         r = cik_startup(rdev);
8535         if (r) {
8536                 DRM_ERROR("cik startup failed on resume\n");
8537                 rdev->accel_working = false;
8538                 return r;
8539         }
8540
8541         return r;
8542
8543 }
8544
8545 /**
8546  * cik_suspend - suspend the asic
8547  *
8548  * @rdev: radeon_device pointer
8549  *
8550  * Bring the chip into a state suitable for suspend (CIK).
8551  * Called at suspend.
8552  * Returns 0 for success.
8553  */
8554 int cik_suspend(struct radeon_device *rdev)
8555 {
8556         radeon_pm_suspend(rdev);
8557         radeon_audio_fini(rdev);
8558         radeon_vm_manager_fini(rdev);
8559         cik_cp_enable(rdev, false);
8560         cik_sdma_enable(rdev, false);
8561         if (rdev->has_uvd) {
8562                 uvd_v1_0_fini(rdev);
8563                 radeon_uvd_suspend(rdev);
8564         }
8565         if (rdev->has_vce)
8566                 radeon_vce_suspend(rdev);
8567         cik_fini_pg(rdev);
8568         cik_fini_cg(rdev);
8569         cik_irq_suspend(rdev);
8570         radeon_wb_disable(rdev);
8571         cik_pcie_gart_disable(rdev);
8572         return 0;
8573 }
8574
8575 /* Plan is to move initialization in that function and use
8576  * helper function so that radeon_device_init pretty much
8577  * do nothing more than calling asic specific function. This
8578  * should also allow to remove a bunch of callback function
8579  * like vram_info.
8580  */
8581 /**
8582  * cik_init - asic specific driver and hw init
8583  *
8584  * @rdev: radeon_device pointer
8585  *
8586  * Setup asic specific driver variables and program the hw
8587  * to a functional state (CIK).
8588  * Called at driver startup.
8589  * Returns 0 for success, errors for failure.
8590  */
8591 int cik_init(struct radeon_device *rdev)
8592 {
8593         struct radeon_ring *ring;
8594         int r;
8595
8596         /* Read BIOS */
8597         if (!radeon_get_bios(rdev)) {
8598                 if (ASIC_IS_AVIVO(rdev))
8599                         return -EINVAL;
8600         }
8601         /* Must be an ATOMBIOS */
8602         if (!rdev->is_atom_bios) {
8603                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8604                 return -EINVAL;
8605         }
8606         r = radeon_atombios_init(rdev);
8607         if (r)
8608                 return r;
8609
8610         /* Post card if necessary */
8611         if (!radeon_card_posted(rdev)) {
8612                 if (!rdev->bios) {
8613                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8614                         return -EINVAL;
8615                 }
8616                 DRM_INFO("GPU not posted. posting now...\n");
8617                 atom_asic_init(rdev->mode_info.atom_context);
8618         }
8619         /* init golden registers */
8620         cik_init_golden_registers(rdev);
8621         /* Initialize scratch registers */
8622         cik_scratch_init(rdev);
8623         /* Initialize surface registers */
8624         radeon_surface_init(rdev);
8625         /* Initialize clocks */
8626         radeon_get_clock_info(rdev->ddev);
8627
8628         /* Fence driver */
8629         r = radeon_fence_driver_init(rdev);
8630         if (r)
8631                 return r;
8632
8633         /* initialize memory controller */
8634         r = cik_mc_init(rdev);
8635         if (r)
8636                 return r;
8637         /* Memory manager */
8638         r = radeon_bo_init(rdev);
8639         if (r)
8640                 return r;
8641
8642         if (rdev->flags & RADEON_IS_IGP) {
8643                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8644                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8645                         r = cik_init_microcode(rdev);
8646                         if (r) {
8647                                 DRM_ERROR("Failed to load firmware!\n");
8648                                 return r;
8649                         }
8650                 }
8651         } else {
8652                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8653                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8654                     !rdev->mc_fw) {
8655                         r = cik_init_microcode(rdev);
8656                         if (r) {
8657                                 DRM_ERROR("Failed to load firmware!\n");
8658                                 return r;
8659                         }
8660                 }
8661         }
8662
8663         /* Initialize power management */
8664         radeon_pm_init(rdev);
8665
8666         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8667         ring->ring_obj = NULL;
8668         r600_ring_init(rdev, ring, 1024 * 1024);
8669
8670         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8671         ring->ring_obj = NULL;
8672         r600_ring_init(rdev, ring, 1024 * 1024);
8673         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8674         if (r)
8675                 return r;
8676
8677         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8678         ring->ring_obj = NULL;
8679         r600_ring_init(rdev, ring, 1024 * 1024);
8680         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8681         if (r)
8682                 return r;
8683
8684         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8685         ring->ring_obj = NULL;
8686         r600_ring_init(rdev, ring, 256 * 1024);
8687
8688         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8689         ring->ring_obj = NULL;
8690         r600_ring_init(rdev, ring, 256 * 1024);
8691
8692         cik_uvd_init(rdev);
8693         cik_vce_init(rdev);
8694
8695         rdev->ih.ring_obj = NULL;
8696         r600_ih_ring_init(rdev, 64 * 1024);
8697
8698         r = r600_pcie_gart_init(rdev);
8699         if (r)
8700                 return r;
8701
8702         rdev->accel_working = true;
8703         r = cik_startup(rdev);
8704         if (r) {
8705                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8706                 cik_cp_fini(rdev);
8707                 cik_sdma_fini(rdev);
8708                 cik_irq_fini(rdev);
8709                 sumo_rlc_fini(rdev);
8710                 cik_mec_fini(rdev);
8711                 radeon_wb_fini(rdev);
8712                 radeon_ib_pool_fini(rdev);
8713                 radeon_vm_manager_fini(rdev);
8714                 radeon_irq_kms_fini(rdev);
8715                 cik_pcie_gart_fini(rdev);
8716                 rdev->accel_working = false;
8717         }
8718
8719         /* Don't start up if the MC ucode is missing.
8720          * The default clocks and voltages before the MC ucode
8721          * is loaded are not suffient for advanced operations.
8722          */
8723         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8724                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8725                 return -EINVAL;
8726         }
8727
8728         return 0;
8729 }
8730
8731 /**
8732  * cik_fini - asic specific driver and hw fini
8733  *
8734  * @rdev: radeon_device pointer
8735  *
8736  * Tear down the asic specific driver variables and program the hw
8737  * to an idle state (CIK).
8738  * Called at driver unload.
8739  */
8740 void cik_fini(struct radeon_device *rdev)
8741 {
8742         radeon_pm_fini(rdev);
8743         cik_cp_fini(rdev);
8744         cik_sdma_fini(rdev);
8745         cik_fini_pg(rdev);
8746         cik_fini_cg(rdev);
8747         cik_irq_fini(rdev);
8748         sumo_rlc_fini(rdev);
8749         cik_mec_fini(rdev);
8750         radeon_wb_fini(rdev);
8751         radeon_vm_manager_fini(rdev);
8752         radeon_ib_pool_fini(rdev);
8753         radeon_irq_kms_fini(rdev);
8754         uvd_v1_0_fini(rdev);
8755         radeon_uvd_fini(rdev);
8756         radeon_vce_fini(rdev);
8757         cik_pcie_gart_fini(rdev);
8758         r600_vram_scratch_fini(rdev);
8759         radeon_gem_fini(rdev);
8760         radeon_fence_driver_fini(rdev);
8761         radeon_bo_fini(rdev);
8762         radeon_atombios_fini(rdev);
8763         kfree(rdev->bios);
8764         rdev->bios = NULL;
8765 }
8766
8767 void dce8_program_fmt(struct drm_encoder *encoder)
8768 {
8769         struct drm_device *dev = encoder->dev;
8770         struct radeon_device *rdev = dev->dev_private;
8771         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8772         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8773         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8774         int bpc = 0;
8775         u32 tmp = 0;
8776         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8777
8778         if (connector) {
8779                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8780                 bpc = radeon_get_monitor_bpc(connector);
8781                 dither = radeon_connector->dither;
8782         }
8783
8784         /* LVDS/eDP FMT is set up by atom */
8785         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8786                 return;
8787
8788         /* not needed for analog */
8789         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8790             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8791                 return;
8792
8793         if (bpc == 0)
8794                 return;
8795
8796         switch (bpc) {
8797         case 6:
8798                 if (dither == RADEON_FMT_DITHER_ENABLE)
8799                         /* XXX sort out optimal dither settings */
8800                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8801                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8802                 else
8803                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8804                 break;
8805         case 8:
8806                 if (dither == RADEON_FMT_DITHER_ENABLE)
8807                         /* XXX sort out optimal dither settings */
8808                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8809                                 FMT_RGB_RANDOM_ENABLE |
8810                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8811                 else
8812                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8813                 break;
8814         case 10:
8815                 if (dither == RADEON_FMT_DITHER_ENABLE)
8816                         /* XXX sort out optimal dither settings */
8817                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8818                                 FMT_RGB_RANDOM_ENABLE |
8819                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8820                 else
8821                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8822                 break;
8823         default:
8824                 /* not needed */
8825                 break;
8826         }
8827
8828         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8829 }
8830
8831 /* display watermark setup */
8832 /**
8833  * dce8_line_buffer_adjust - Set up the line buffer
8834  *
8835  * @rdev: radeon_device pointer
8836  * @radeon_crtc: the selected display controller
8837  * @mode: the current display mode on the selected display
8838  * controller
8839  *
8840  * Setup up the line buffer allocation for
8841  * the selected display controller (CIK).
8842  * Returns the line buffer size in pixels.
8843  */
8844 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8845                                    struct radeon_crtc *radeon_crtc,
8846                                    struct drm_display_mode *mode)
8847 {
8848         u32 tmp, buffer_alloc, i;
8849         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8850         /*
8851          * Line Buffer Setup
8852          * There are 6 line buffers, one for each display controllers.
8853          * There are 3 partitions per LB. Select the number of partitions
8854          * to enable based on the display width.  For display widths larger
8855          * than 4096, you need use to use 2 display controllers and combine
8856          * them using the stereo blender.
8857          */
8858         if (radeon_crtc->base.enabled && mode) {
8859                 if (mode->crtc_hdisplay < 1920) {
8860                         tmp = 1;
8861                         buffer_alloc = 2;
8862                 } else if (mode->crtc_hdisplay < 2560) {
8863                         tmp = 2;
8864                         buffer_alloc = 2;
8865                 } else if (mode->crtc_hdisplay < 4096) {
8866                         tmp = 0;
8867                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8868                 } else {
8869                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8870                         tmp = 0;
8871                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8872                 }
8873         } else {
8874                 tmp = 1;
8875                 buffer_alloc = 0;
8876         }
8877
8878         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8879                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8880
8881         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8882                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8883         for (i = 0; i < rdev->usec_timeout; i++) {
8884                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8885                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8886                         break;
8887                 udelay(1);
8888         }
8889
8890         if (radeon_crtc->base.enabled && mode) {
8891                 switch (tmp) {
8892                 case 0:
8893                 default:
8894                         return 4096 * 2;
8895                 case 1:
8896                         return 1920 * 2;
8897                 case 2:
8898                         return 2560 * 2;
8899                 }
8900         }
8901
8902         /* controller not enabled, so no lb used */
8903         return 0;
8904 }
8905
8906 /**
8907  * cik_get_number_of_dram_channels - get the number of dram channels
8908  *
8909  * @rdev: radeon_device pointer
8910  *
8911  * Look up the number of video ram channels (CIK).
8912  * Used for display watermark bandwidth calculations
8913  * Returns the number of dram channels
8914  */
8915 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8916 {
8917         u32 tmp = RREG32(MC_SHARED_CHMAP);
8918
8919         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8920         case 0:
8921         default:
8922                 return 1;
8923         case 1:
8924                 return 2;
8925         case 2:
8926                 return 4;
8927         case 3:
8928                 return 8;
8929         case 4:
8930                 return 3;
8931         case 5:
8932                 return 6;
8933         case 6:
8934                 return 10;
8935         case 7:
8936                 return 12;
8937         case 8:
8938                 return 16;
8939         }
8940 }
8941
8942 struct dce8_wm_params {
8943         u32 dram_channels; /* number of dram channels */
8944         u32 yclk;          /* bandwidth per dram data pin in kHz */
8945         u32 sclk;          /* engine clock in kHz */
8946         u32 disp_clk;      /* display clock in kHz */
8947         u32 src_width;     /* viewport width */
8948         u32 active_time;   /* active display time in ns */
8949         u32 blank_time;    /* blank time in ns */
8950         bool interlaced;    /* mode is interlaced */
8951         fixed20_12 vsc;    /* vertical scale ratio */
8952         u32 num_heads;     /* number of active crtcs */
8953         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8954         u32 lb_size;       /* line buffer allocated to pipe */
8955         u32 vtaps;         /* vertical scaler taps */
8956 };
8957
8958 /**
8959  * dce8_dram_bandwidth - get the dram bandwidth
8960  *
8961  * @wm: watermark calculation data
8962  *
8963  * Calculate the raw dram bandwidth (CIK).
8964  * Used for display watermark bandwidth calculations
8965  * Returns the dram bandwidth in MBytes/s
8966  */
8967 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8968 {
8969         /* Calculate raw DRAM Bandwidth */
8970         fixed20_12 dram_efficiency; /* 0.7 */
8971         fixed20_12 yclk, dram_channels, bandwidth;
8972         fixed20_12 a;
8973
8974         a.full = dfixed_const(1000);
8975         yclk.full = dfixed_const(wm->yclk);
8976         yclk.full = dfixed_div(yclk, a);
8977         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8978         a.full = dfixed_const(10);
8979         dram_efficiency.full = dfixed_const(7);
8980         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8981         bandwidth.full = dfixed_mul(dram_channels, yclk);
8982         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8983
8984         return dfixed_trunc(bandwidth);
8985 }
8986
8987 /**
8988  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8989  *
8990  * @wm: watermark calculation data
8991  *
8992  * Calculate the dram bandwidth used for display (CIK).
8993  * Used for display watermark bandwidth calculations
8994  * Returns the dram bandwidth for display in MBytes/s
8995  */
8996 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8997 {
8998         /* Calculate DRAM Bandwidth and the part allocated to display. */
8999         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9000         fixed20_12 yclk, dram_channels, bandwidth;
9001         fixed20_12 a;
9002
9003         a.full = dfixed_const(1000);
9004         yclk.full = dfixed_const(wm->yclk);
9005         yclk.full = dfixed_div(yclk, a);
9006         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9007         a.full = dfixed_const(10);
9008         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9009         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9010         bandwidth.full = dfixed_mul(dram_channels, yclk);
9011         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9012
9013         return dfixed_trunc(bandwidth);
9014 }
9015
9016 /**
9017  * dce8_data_return_bandwidth - get the data return bandwidth
9018  *
9019  * @wm: watermark calculation data
9020  *
9021  * Calculate the data return bandwidth used for display (CIK).
9022  * Used for display watermark bandwidth calculations
9023  * Returns the data return bandwidth in MBytes/s
9024  */
9025 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9026 {
9027         /* Calculate the display Data return Bandwidth */
9028         fixed20_12 return_efficiency; /* 0.8 */
9029         fixed20_12 sclk, bandwidth;
9030         fixed20_12 a;
9031
9032         a.full = dfixed_const(1000);
9033         sclk.full = dfixed_const(wm->sclk);
9034         sclk.full = dfixed_div(sclk, a);
9035         a.full = dfixed_const(10);
9036         return_efficiency.full = dfixed_const(8);
9037         return_efficiency.full = dfixed_div(return_efficiency, a);
9038         a.full = dfixed_const(32);
9039         bandwidth.full = dfixed_mul(a, sclk);
9040         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9041
9042         return dfixed_trunc(bandwidth);
9043 }
9044
9045 /**
9046  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9047  *
9048  * @wm: watermark calculation data
9049  *
9050  * Calculate the dmif bandwidth used for display (CIK).
9051  * Used for display watermark bandwidth calculations
9052  * Returns the dmif bandwidth in MBytes/s
9053  */
9054 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9055 {
9056         /* Calculate the DMIF Request Bandwidth */
9057         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9058         fixed20_12 disp_clk, bandwidth;
9059         fixed20_12 a, b;
9060
9061         a.full = dfixed_const(1000);
9062         disp_clk.full = dfixed_const(wm->disp_clk);
9063         disp_clk.full = dfixed_div(disp_clk, a);
9064         a.full = dfixed_const(32);
9065         b.full = dfixed_mul(a, disp_clk);
9066
9067         a.full = dfixed_const(10);
9068         disp_clk_request_efficiency.full = dfixed_const(8);
9069         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9070
9071         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9072
9073         return dfixed_trunc(bandwidth);
9074 }
9075
9076 /**
9077  * dce8_available_bandwidth - get the min available bandwidth
9078  *
9079  * @wm: watermark calculation data
9080  *
9081  * Calculate the min available bandwidth used for display (CIK).
9082  * Used for display watermark bandwidth calculations
9083  * Returns the min available bandwidth in MBytes/s
9084  */
9085 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9086 {
9087         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9088         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9089         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9090         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9091
9092         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9093 }
9094
9095 /**
9096  * dce8_average_bandwidth - get the average available bandwidth
9097  *
9098  * @wm: watermark calculation data
9099  *
9100  * Calculate the average available bandwidth used for display (CIK).
9101  * Used for display watermark bandwidth calculations
9102  * Returns the average available bandwidth in MBytes/s
9103  */
9104 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9105 {
9106         /* Calculate the display mode Average Bandwidth
9107          * DisplayMode should contain the source and destination dimensions,
9108          * timing, etc.
9109          */
9110         fixed20_12 bpp;
9111         fixed20_12 line_time;
9112         fixed20_12 src_width;
9113         fixed20_12 bandwidth;
9114         fixed20_12 a;
9115
9116         a.full = dfixed_const(1000);
9117         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9118         line_time.full = dfixed_div(line_time, a);
9119         bpp.full = dfixed_const(wm->bytes_per_pixel);
9120         src_width.full = dfixed_const(wm->src_width);
9121         bandwidth.full = dfixed_mul(src_width, bpp);
9122         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9123         bandwidth.full = dfixed_div(bandwidth, line_time);
9124
9125         return dfixed_trunc(bandwidth);
9126 }
9127
9128 /**
9129  * dce8_latency_watermark - get the latency watermark
9130  *
9131  * @wm: watermark calculation data
9132  *
9133  * Calculate the latency watermark (CIK).
9134  * Used for display watermark bandwidth calculations
9135  * Returns the latency watermark in ns
9136  */
9137 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9138 {
9139         /* First calculate the latency in ns */
9140         u32 mc_latency = 2000; /* 2000 ns. */
9141         u32 available_bandwidth = dce8_available_bandwidth(wm);
9142         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9143         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9144         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9145         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9146                 (wm->num_heads * cursor_line_pair_return_time);
9147         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9148         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9149         u32 tmp, dmif_size = 12288;
9150         fixed20_12 a, b, c;
9151
9152         if (wm->num_heads == 0)
9153                 return 0;
9154
9155         a.full = dfixed_const(2);
9156         b.full = dfixed_const(1);
9157         if ((wm->vsc.full > a.full) ||
9158             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9159             (wm->vtaps >= 5) ||
9160             ((wm->vsc.full >= a.full) && wm->interlaced))
9161                 max_src_lines_per_dst_line = 4;
9162         else
9163                 max_src_lines_per_dst_line = 2;
9164
9165         a.full = dfixed_const(available_bandwidth);
9166         b.full = dfixed_const(wm->num_heads);
9167         a.full = dfixed_div(a, b);
9168         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9169         tmp = min(dfixed_trunc(a), tmp);
9170
9171         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9172
9173         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9174         b.full = dfixed_const(1000);
9175         c.full = dfixed_const(lb_fill_bw);
9176         b.full = dfixed_div(c, b);
9177         a.full = dfixed_div(a, b);
9178         line_fill_time = dfixed_trunc(a);
9179
9180         if (line_fill_time < wm->active_time)
9181                 return latency;
9182         else
9183                 return latency + (line_fill_time - wm->active_time);
9184
9185 }
9186
9187 /**
9188  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9189  * average and available dram bandwidth
9190  *
9191  * @wm: watermark calculation data
9192  *
9193  * Check if the display average bandwidth fits in the display
9194  * dram bandwidth (CIK).
9195  * Used for display watermark bandwidth calculations
9196  * Returns true if the display fits, false if not.
9197  */
9198 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9199 {
9200         if (dce8_average_bandwidth(wm) <=
9201             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9202                 return true;
9203         else
9204                 return false;
9205 }
9206
9207 /**
9208  * dce8_average_bandwidth_vs_available_bandwidth - check
9209  * average and available bandwidth
9210  *
9211  * @wm: watermark calculation data
9212  *
9213  * Check if the display average bandwidth fits in the display
9214  * available bandwidth (CIK).
9215  * Used for display watermark bandwidth calculations
9216  * Returns true if the display fits, false if not.
9217  */
9218 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9219 {
9220         if (dce8_average_bandwidth(wm) <=
9221             (dce8_available_bandwidth(wm) / wm->num_heads))
9222                 return true;
9223         else
9224                 return false;
9225 }
9226
9227 /**
9228  * dce8_check_latency_hiding - check latency hiding
9229  *
9230  * @wm: watermark calculation data
9231  *
9232  * Check latency hiding (CIK).
9233  * Used for display watermark bandwidth calculations
9234  * Returns true if the display fits, false if not.
9235  */
9236 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9237 {
9238         u32 lb_partitions = wm->lb_size / wm->src_width;
9239         u32 line_time = wm->active_time + wm->blank_time;
9240         u32 latency_tolerant_lines;
9241         u32 latency_hiding;
9242         fixed20_12 a;
9243
9244         a.full = dfixed_const(1);
9245         if (wm->vsc.full > a.full)
9246                 latency_tolerant_lines = 1;
9247         else {
9248                 if (lb_partitions <= (wm->vtaps + 1))
9249                         latency_tolerant_lines = 1;
9250                 else
9251                         latency_tolerant_lines = 2;
9252         }
9253
9254         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9255
9256         if (dce8_latency_watermark(wm) <= latency_hiding)
9257                 return true;
9258         else
9259                 return false;
9260 }
9261
9262 /**
9263  * dce8_program_watermarks - program display watermarks
9264  *
9265  * @rdev: radeon_device pointer
9266  * @radeon_crtc: the selected display controller
9267  * @lb_size: line buffer size
9268  * @num_heads: number of display controllers in use
9269  *
9270  * Calculate and program the display watermarks for the
9271  * selected display controller (CIK).
9272  */
9273 static void dce8_program_watermarks(struct radeon_device *rdev,
9274                                     struct radeon_crtc *radeon_crtc,
9275                                     u32 lb_size, u32 num_heads)
9276 {
9277         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9278         struct dce8_wm_params wm_low, wm_high;
9279         u32 active_time;
9280         u32 line_time = 0;
9281         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9282         u32 tmp, wm_mask;
9283
9284         if (radeon_crtc->base.enabled && num_heads && mode) {
9285                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9286                                             (u32)mode->clock);
9287                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9288                                           (u32)mode->clock);
9289                 line_time = min(line_time, (u32)65535);
9290
9291                 /* watermark for high clocks */
9292                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9293                     rdev->pm.dpm_enabled) {
9294                         wm_high.yclk =
9295                                 radeon_dpm_get_mclk(rdev, false) * 10;
9296                         wm_high.sclk =
9297                                 radeon_dpm_get_sclk(rdev, false) * 10;
9298                 } else {
9299                         wm_high.yclk = rdev->pm.current_mclk * 10;
9300                         wm_high.sclk = rdev->pm.current_sclk * 10;
9301                 }
9302
9303                 wm_high.disp_clk = mode->clock;
9304                 wm_high.src_width = mode->crtc_hdisplay;
9305                 wm_high.active_time = active_time;
9306                 wm_high.blank_time = line_time - wm_high.active_time;
9307                 wm_high.interlaced = false;
9308                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9309                         wm_high.interlaced = true;
9310                 wm_high.vsc = radeon_crtc->vsc;
9311                 wm_high.vtaps = 1;
9312                 if (radeon_crtc->rmx_type != RMX_OFF)
9313                         wm_high.vtaps = 2;
9314                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9315                 wm_high.lb_size = lb_size;
9316                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9317                 wm_high.num_heads = num_heads;
9318
9319                 /* set for high clocks */
9320                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9321
9322                 /* possibly force display priority to high */
9323                 /* should really do this at mode validation time... */
9324                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9325                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9326                     !dce8_check_latency_hiding(&wm_high) ||
9327                     (rdev->disp_priority == 2)) {
9328                         DRM_DEBUG_KMS("force priority to high\n");
9329                 }
9330
9331                 /* watermark for low clocks */
9332                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9333                     rdev->pm.dpm_enabled) {
9334                         wm_low.yclk =
9335                                 radeon_dpm_get_mclk(rdev, true) * 10;
9336                         wm_low.sclk =
9337                                 radeon_dpm_get_sclk(rdev, true) * 10;
9338                 } else {
9339                         wm_low.yclk = rdev->pm.current_mclk * 10;
9340                         wm_low.sclk = rdev->pm.current_sclk * 10;
9341                 }
9342
9343                 wm_low.disp_clk = mode->clock;
9344                 wm_low.src_width = mode->crtc_hdisplay;
9345                 wm_low.active_time = active_time;
9346                 wm_low.blank_time = line_time - wm_low.active_time;
9347                 wm_low.interlaced = false;
9348                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9349                         wm_low.interlaced = true;
9350                 wm_low.vsc = radeon_crtc->vsc;
9351                 wm_low.vtaps = 1;
9352                 if (radeon_crtc->rmx_type != RMX_OFF)
9353                         wm_low.vtaps = 2;
9354                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9355                 wm_low.lb_size = lb_size;
9356                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9357                 wm_low.num_heads = num_heads;
9358
9359                 /* set for low clocks */
9360                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9361
9362                 /* possibly force display priority to high */
9363                 /* should really do this at mode validation time... */
9364                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9365                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9366                     !dce8_check_latency_hiding(&wm_low) ||
9367                     (rdev->disp_priority == 2)) {
9368                         DRM_DEBUG_KMS("force priority to high\n");
9369                 }
9370
9371                 /* Save number of lines the linebuffer leads before the scanout */
9372                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9373         }
9374
9375         /* select wm A */
9376         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9377         tmp = wm_mask;
9378         tmp &= ~LATENCY_WATERMARK_MASK(3);
9379         tmp |= LATENCY_WATERMARK_MASK(1);
9380         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9381         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9382                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9383                 LATENCY_HIGH_WATERMARK(line_time)));
9384         /* select wm B */
9385         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9386         tmp &= ~LATENCY_WATERMARK_MASK(3);
9387         tmp |= LATENCY_WATERMARK_MASK(2);
9388         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9389         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9390                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9391                 LATENCY_HIGH_WATERMARK(line_time)));
9392         /* restore original selection */
9393         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9394
9395         /* save values for DPM */
9396         radeon_crtc->line_time = line_time;
9397         radeon_crtc->wm_high = latency_watermark_a;
9398         radeon_crtc->wm_low = latency_watermark_b;
9399 }
9400
9401 /**
9402  * dce8_bandwidth_update - program display watermarks
9403  *
9404  * @rdev: radeon_device pointer
9405  *
9406  * Calculate and program the display watermarks and line
9407  * buffer allocation (CIK).
9408  */
9409 void dce8_bandwidth_update(struct radeon_device *rdev)
9410 {
9411         struct drm_display_mode *mode = NULL;
9412         u32 num_heads = 0, lb_size;
9413         int i;
9414
9415         if (!rdev->mode_info.mode_config_initialized)
9416                 return;
9417
9418         radeon_update_display_priority(rdev);
9419
9420         for (i = 0; i < rdev->num_crtc; i++) {
9421                 if (rdev->mode_info.crtcs[i]->base.enabled)
9422                         num_heads++;
9423         }
9424         for (i = 0; i < rdev->num_crtc; i++) {
9425                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9426                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9427                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9428         }
9429 }
9430
9431 /**
9432  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9433  *
9434  * @rdev: radeon_device pointer
9435  *
9436  * Fetches a GPU clock counter snapshot (SI).
9437  * Returns the 64 bit clock counter snapshot.
9438  */
9439 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9440 {
9441         uint64_t clock;
9442
9443         mutex_lock(&rdev->gpu_clock_mutex);
9444         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9445         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9446                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9447         mutex_unlock(&rdev->gpu_clock_mutex);
9448         return clock;
9449 }
9450
9451 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9452                              u32 cntl_reg, u32 status_reg)
9453 {
9454         int r, i;
9455         struct atom_clock_dividers dividers;
9456         uint32_t tmp;
9457
9458         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9459                                            clock, false, &dividers);
9460         if (r)
9461                 return r;
9462
9463         tmp = RREG32_SMC(cntl_reg);
9464         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9465         tmp |= dividers.post_divider;
9466         WREG32_SMC(cntl_reg, tmp);
9467
9468         for (i = 0; i < 100; i++) {
9469                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9470                         break;
9471                 mdelay(10);
9472         }
9473         if (i == 100)
9474                 return -ETIMEDOUT;
9475
9476         return 0;
9477 }
9478
9479 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9480 {
9481         int r = 0;
9482
9483         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9484         if (r)
9485                 return r;
9486
9487         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9488         return r;
9489 }
9490
9491 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9492 {
9493         int r, i;
9494         struct atom_clock_dividers dividers;
9495         u32 tmp;
9496
9497         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9498                                            ecclk, false, &dividers);
9499         if (r)
9500                 return r;
9501
9502         for (i = 0; i < 100; i++) {
9503                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9504                         break;
9505                 mdelay(10);
9506         }
9507         if (i == 100)
9508                 return -ETIMEDOUT;
9509
9510         tmp = RREG32_SMC(CG_ECLK_CNTL);
9511         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9512         tmp |= dividers.post_divider;
9513         WREG32_SMC(CG_ECLK_CNTL, tmp);
9514
9515         for (i = 0; i < 100; i++) {
9516                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9517                         break;
9518                 mdelay(10);
9519         }
9520         if (i == 100)
9521                 return -ETIMEDOUT;
9522
9523         return 0;
9524 }
9525
9526 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9527 {
9528         struct pci_dev *root = rdev->pdev->bus->self;
9529         int bridge_pos, gpu_pos;
9530         u32 speed_cntl, mask, current_data_rate;
9531         int ret, i;
9532         u16 tmp16;
9533
9534         if (pci_is_root_bus(rdev->pdev->bus))
9535                 return;
9536
9537         if (radeon_pcie_gen2 == 0)
9538                 return;
9539
9540         if (rdev->flags & RADEON_IS_IGP)
9541                 return;
9542
9543         if (!(rdev->flags & RADEON_IS_PCIE))
9544                 return;
9545
9546         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9547         if (ret != 0)
9548                 return;
9549
9550         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9551                 return;
9552
9553         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9554         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9555                 LC_CURRENT_DATA_RATE_SHIFT;
9556         if (mask & DRM_PCIE_SPEED_80) {
9557                 if (current_data_rate == 2) {
9558                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9559                         return;
9560                 }
9561                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9562         } else if (mask & DRM_PCIE_SPEED_50) {
9563                 if (current_data_rate == 1) {
9564                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9565                         return;
9566                 }
9567                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9568         }
9569
9570         bridge_pos = pci_pcie_cap(root);
9571         if (!bridge_pos)
9572                 return;
9573
9574         gpu_pos = pci_pcie_cap(rdev->pdev);
9575         if (!gpu_pos)
9576                 return;
9577
9578         if (mask & DRM_PCIE_SPEED_80) {
9579                 /* re-try equalization if gen3 is not already enabled */
9580                 if (current_data_rate != 2) {
9581                         u16 bridge_cfg, gpu_cfg;
9582                         u16 bridge_cfg2, gpu_cfg2;
9583                         u32 max_lw, current_lw, tmp;
9584
9585                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9586                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9587
9588                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9589                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9590
9591                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9592                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9593
9594                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9595                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9596                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9597
9598                         if (current_lw < max_lw) {
9599                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9600                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9601                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9602                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9603                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9604                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9605                                 }
9606                         }
9607
9608                         for (i = 0; i < 10; i++) {
9609                                 /* check status */
9610                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9611                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9612                                         break;
9613
9614                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9615                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9616
9617                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9618                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9619
9620                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9621                                 tmp |= LC_SET_QUIESCE;
9622                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9623
9624                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9625                                 tmp |= LC_REDO_EQ;
9626                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9627
9628                                 mdelay(100);
9629
9630                                 /* linkctl */
9631                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9632                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9633                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9634                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9635
9636                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9637                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9638                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9639                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9640
9641                                 /* linkctl2 */
9642                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9643                                 tmp16 &= ~((1 << 4) | (7 << 9));
9644                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9645                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9646
9647                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9648                                 tmp16 &= ~((1 << 4) | (7 << 9));
9649                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9650                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9651
9652                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9653                                 tmp &= ~LC_SET_QUIESCE;
9654                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9655                         }
9656                 }
9657         }
9658
9659         /* set the link speed */
9660         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9661         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9662         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9663
9664         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9665         tmp16 &= ~0xf;
9666         if (mask & DRM_PCIE_SPEED_80)
9667                 tmp16 |= 3; /* gen3 */
9668         else if (mask & DRM_PCIE_SPEED_50)
9669                 tmp16 |= 2; /* gen2 */
9670         else
9671                 tmp16 |= 1; /* gen1 */
9672         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9673
9674         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9675         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9676         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9677
9678         for (i = 0; i < rdev->usec_timeout; i++) {
9679                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9680                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9681                         break;
9682                 udelay(1);
9683         }
9684 }
9685
9686 static void cik_program_aspm(struct radeon_device *rdev)
9687 {
9688         u32 data, orig;
9689         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9690         bool disable_clkreq = false;
9691
9692         if (radeon_aspm == 0)
9693                 return;
9694
9695         /* XXX double check IGPs */
9696         if (rdev->flags & RADEON_IS_IGP)
9697                 return;
9698
9699         if (!(rdev->flags & RADEON_IS_PCIE))
9700                 return;
9701
9702         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9703         data &= ~LC_XMIT_N_FTS_MASK;
9704         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9705         if (orig != data)
9706                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9707
9708         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9709         data |= LC_GO_TO_RECOVERY;
9710         if (orig != data)
9711                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9712
9713         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9714         data |= P_IGNORE_EDB_ERR;
9715         if (orig != data)
9716                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9717
9718         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9719         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9720         data |= LC_PMI_TO_L1_DIS;
9721         if (!disable_l0s)
9722                 data |= LC_L0S_INACTIVITY(7);
9723
9724         if (!disable_l1) {
9725                 data |= LC_L1_INACTIVITY(7);
9726                 data &= ~LC_PMI_TO_L1_DIS;
9727                 if (orig != data)
9728                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9729
9730                 if (!disable_plloff_in_l1) {
9731                         bool clk_req_support;
9732
9733                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9734                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9735                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9736                         if (orig != data)
9737                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9738
9739                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9740                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9741                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9742                         if (orig != data)
9743                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9744
9745                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9746                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9747                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9748                         if (orig != data)
9749                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9750
9751                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9752                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9753                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9754                         if (orig != data)
9755                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9756
9757                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9758                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9759                         data |= LC_DYN_LANES_PWR_STATE(3);
9760                         if (orig != data)
9761                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9762
9763                         if (!disable_clkreq &&
9764                             !pci_is_root_bus(rdev->pdev->bus)) {
9765                                 struct pci_dev *root = rdev->pdev->bus->self;
9766                                 u32 lnkcap;
9767
9768                                 clk_req_support = false;
9769                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9770                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9771                                         clk_req_support = true;
9772                         } else {
9773                                 clk_req_support = false;
9774                         }
9775
9776                         if (clk_req_support) {
9777                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9778                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9779                                 if (orig != data)
9780                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9781
9782                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9783                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9784                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9785                                 if (orig != data)
9786                                         WREG32_SMC(THM_CLK_CNTL, data);
9787
9788                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9789                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9790                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9791                                 if (orig != data)
9792                                         WREG32_SMC(MISC_CLK_CTRL, data);
9793
9794                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9795                                 data &= ~BCLK_AS_XCLK;
9796                                 if (orig != data)
9797                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9798
9799                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9800                                 data &= ~FORCE_BIF_REFCLK_EN;
9801                                 if (orig != data)
9802                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9803
9804                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9805                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9806                                 data |= MPLL_CLKOUT_SEL(4);
9807                                 if (orig != data)
9808                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9809                         }
9810                 }
9811         } else {
9812                 if (orig != data)
9813                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9814         }
9815
9816         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9817         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9818         if (orig != data)
9819                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9820
9821         if (!disable_l0s) {
9822                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9823                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9824                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9825                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9826                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9827                                 data &= ~LC_L0S_INACTIVITY_MASK;
9828                                 if (orig != data)
9829                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9830                         }
9831                 }
9832         }
9833 }