Merge tag 'drm-misc-fixes-2017-11-02' of git://anongit.freedesktop.org/drm/drm-misc...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122
123 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
124 extern void r600_ih_ring_fini(struct radeon_device *rdev);
125 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
126 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
128 extern void sumo_rlc_fini(struct radeon_device *rdev);
129 extern int sumo_rlc_init(struct radeon_device *rdev);
130 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
131 extern void si_rlc_reset(struct radeon_device *rdev);
132 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
133 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
134 extern int cik_sdma_resume(struct radeon_device *rdev);
135 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
136 extern void cik_sdma_fini(struct radeon_device *rdev);
137 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
138 static void cik_rlc_stop(struct radeon_device *rdev);
139 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
140 static void cik_program_aspm(struct radeon_device *rdev);
141 static void cik_init_pg(struct radeon_device *rdev);
142 static void cik_init_cg(struct radeon_device *rdev);
143 static void cik_fini_pg(struct radeon_device *rdev);
144 static void cik_fini_cg(struct radeon_device *rdev);
145 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
146                                           bool enable);
147
148 /**
149  * cik_get_allowed_info_register - fetch the register for the info ioctl
150  *
151  * @rdev: radeon_device pointer
152  * @reg: register offset in bytes
153  * @val: register value
154  *
155  * Returns 0 for success or -EINVAL for an invalid register
156  *
157  */
158 int cik_get_allowed_info_register(struct radeon_device *rdev,
159                                   u32 reg, u32 *val)
160 {
161         switch (reg) {
162         case GRBM_STATUS:
163         case GRBM_STATUS2:
164         case GRBM_STATUS_SE0:
165         case GRBM_STATUS_SE1:
166         case GRBM_STATUS_SE2:
167         case GRBM_STATUS_SE3:
168         case SRBM_STATUS:
169         case SRBM_STATUS2:
170         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
171         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
172         case UVD_STATUS:
173         /* TODO VCE */
174                 *val = RREG32(reg);
175                 return 0;
176         default:
177                 return -EINVAL;
178         }
179 }
180
181 /*
182  * Indirect registers accessor
183  */
184 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
185 {
186         unsigned long flags;
187         u32 r;
188
189         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
190         WREG32(CIK_DIDT_IND_INDEX, (reg));
191         r = RREG32(CIK_DIDT_IND_DATA);
192         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
193         return r;
194 }
195
196 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198         unsigned long flags;
199
200         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
201         WREG32(CIK_DIDT_IND_INDEX, (reg));
202         WREG32(CIK_DIDT_IND_DATA, (v));
203         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
204 }
205
206 /* get temperature in millidegrees */
207 int ci_get_temp(struct radeon_device *rdev)
208 {
209         u32 temp;
210         int actual_temp = 0;
211
212         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
213                 CTF_TEMP_SHIFT;
214
215         if (temp & 0x200)
216                 actual_temp = 255;
217         else
218                 actual_temp = temp & 0x1ff;
219
220         actual_temp = actual_temp * 1000;
221
222         return actual_temp;
223 }
224
225 /* get temperature in millidegrees */
226 int kv_get_temp(struct radeon_device *rdev)
227 {
228         u32 temp;
229         int actual_temp = 0;
230
231         temp = RREG32_SMC(0xC0300E0C);
232
233         if (temp)
234                 actual_temp = (temp / 8) - 49;
235         else
236                 actual_temp = 0;
237
238         actual_temp = actual_temp * 1000;
239
240         return actual_temp;
241 }
242
243 /*
244  * Indirect registers accessor
245  */
246 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
247 {
248         unsigned long flags;
249         u32 r;
250
251         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
252         WREG32(PCIE_INDEX, reg);
253         (void)RREG32(PCIE_INDEX);
254         r = RREG32(PCIE_DATA);
255         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
256         return r;
257 }
258
259 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
260 {
261         unsigned long flags;
262
263         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
264         WREG32(PCIE_INDEX, reg);
265         (void)RREG32(PCIE_INDEX);
266         WREG32(PCIE_DATA, v);
267         (void)RREG32(PCIE_DATA);
268         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
269 }
270
271 static const u32 spectre_rlc_save_restore_register_list[] =
272 {
273         (0x0e00 << 16) | (0xc12c >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc140 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc150 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc15c >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc168 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc170 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc178 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc204 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2b4 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2b8 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc2bc >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc2c0 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x8228 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0x829c >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x869c >> 2),
302         0x00000000,
303         (0x0600 << 16) | (0x98f4 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0x98f8 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0x9900 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0xc260 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x90e8 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x3c000 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x3c00c >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x8c1c >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0x9700 >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x4e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x5e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x6e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x7e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0x8e00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0x9e00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0xae00 << 16) | (0xcd20 >> 2),
336         0x00000000,
337         (0xbe00 << 16) | (0xcd20 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x89bc >> 2),
340         0x00000000,
341         (0x0e00 << 16) | (0x8900 >> 2),
342         0x00000000,
343         0x3,
344         (0x0e00 << 16) | (0xc130 >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc134 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc1fc >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc208 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc264 >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc268 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc26c >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc270 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc274 >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc278 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc27c >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc280 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc284 >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc288 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc28c >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc290 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc294 >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc298 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc29c >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a0 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2a4 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2a8 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0xc2ac  >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0xc2b0 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x301d0 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30238 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30250 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x30254 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30258 >> 2),
401         0x00000000,
402         (0x0e00 << 16) | (0x3025c >> 2),
403         0x00000000,
404         (0x4e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x5e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x6e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x7e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0x8e00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0x9e00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0xae00 << 16) | (0xc900 >> 2),
417         0x00000000,
418         (0xbe00 << 16) | (0xc900 >> 2),
419         0x00000000,
420         (0x4e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x5e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x6e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x7e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0x8e00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0x9e00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0xae00 << 16) | (0xc904 >> 2),
433         0x00000000,
434         (0xbe00 << 16) | (0xc904 >> 2),
435         0x00000000,
436         (0x4e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x5e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x6e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x7e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0x8e00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0x9e00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0xae00 << 16) | (0xc908 >> 2),
449         0x00000000,
450         (0xbe00 << 16) | (0xc908 >> 2),
451         0x00000000,
452         (0x4e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x5e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x6e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x7e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0x8e00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0x9e00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0xae00 << 16) | (0xc90c >> 2),
465         0x00000000,
466         (0xbe00 << 16) | (0xc90c >> 2),
467         0x00000000,
468         (0x4e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x5e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x6e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x7e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0x8e00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0x9e00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0xae00 << 16) | (0xc910 >> 2),
481         0x00000000,
482         (0xbe00 << 16) | (0xc910 >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xc99c >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9834 >> 2),
487         0x00000000,
488         (0x0000 << 16) | (0x30f00 >> 2),
489         0x00000000,
490         (0x0001 << 16) | (0x30f00 >> 2),
491         0x00000000,
492         (0x0000 << 16) | (0x30f04 >> 2),
493         0x00000000,
494         (0x0001 << 16) | (0x30f04 >> 2),
495         0x00000000,
496         (0x0000 << 16) | (0x30f08 >> 2),
497         0x00000000,
498         (0x0001 << 16) | (0x30f08 >> 2),
499         0x00000000,
500         (0x0000 << 16) | (0x30f0c >> 2),
501         0x00000000,
502         (0x0001 << 16) | (0x30f0c >> 2),
503         0x00000000,
504         (0x0600 << 16) | (0x9b7c >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x8a14 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8a18 >> 2),
509         0x00000000,
510         (0x0600 << 16) | (0x30a00 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8bf0 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x8bcc >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x8b24 >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x30a04 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a10 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a14 >> 2),
523         0x00000000,
524         (0x0600 << 16) | (0x30a18 >> 2),
525         0x00000000,
526         (0x0600 << 16) | (0x30a2c >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc700 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc704 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xc708 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0xc768 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc770 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc774 >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc778 >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc77c >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc780 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc784 >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc788 >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc78c >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc798 >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc79c >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a0 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7a4 >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7a8 >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7ac >> 2),
563         0x00000000,
564         (0x0400 << 16) | (0xc7b0 >> 2),
565         0x00000000,
566         (0x0400 << 16) | (0xc7b4 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x9100 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x3c010 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92a8 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92ac >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92b4 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92b8 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92bc >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c0 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92c4 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92c8 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x92cc >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x92d0 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c00 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c04 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c20 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0x8c38 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x8c3c >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xae00 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0x9604 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac08 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac0c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac10 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac14 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac58 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac68 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac6c >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac70 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac74 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac78 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac7c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac80 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac84 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0xac88 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0xac8c >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x970c >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x9714 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x9718 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x971c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x4e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x5e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x6e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x7e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0x8e00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0x9e00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0xae00 << 16) | (0x31068 >> 2),
659         0x00000000,
660         (0xbe00 << 16) | (0x31068 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xcd10 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xcd14 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b0 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88b4 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x88b8 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x88bc >> 2),
673         0x00000000,
674         (0x0400 << 16) | (0x89c0 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88c4 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88c8 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d0 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x88d4 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x88d8 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x8980 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x30938 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x3093c >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x30940 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x89a0 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30900 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x30904 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x89b4 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c210 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x3c214 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x3c218 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0x8904 >> 2),
709         0x00000000,
710         0x5,
711         (0x0e00 << 16) | (0x8c28 >> 2),
712         (0x0e00 << 16) | (0x8c2c >> 2),
713         (0x0e00 << 16) | (0x8c30 >> 2),
714         (0x0e00 << 16) | (0x8c34 >> 2),
715         (0x0e00 << 16) | (0x9600 >> 2),
716 };
717
718 static const u32 kalindi_rlc_save_restore_register_list[] =
719 {
720         (0x0e00 << 16) | (0xc12c >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc140 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc150 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc15c >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc168 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc170 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc204 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2b4 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2b8 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc2bc >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0xc2c0 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x8228 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x829c >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x869c >> 2),
747         0x00000000,
748         (0x0600 << 16) | (0x98f4 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0x98f8 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x9900 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0xc260 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x90e8 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x3c000 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x3c00c >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x8c1c >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0x9700 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x4e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x5e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x6e00 << 16) | (0xcd20 >> 2),
773         0x00000000,
774         (0x7e00 << 16) | (0xcd20 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x89bc >> 2),
777         0x00000000,
778         (0x0e00 << 16) | (0x8900 >> 2),
779         0x00000000,
780         0x3,
781         (0x0e00 << 16) | (0xc130 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc134 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc1fc >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc208 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc264 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc268 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc26c >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc270 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc274 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc28c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc290 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc294 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc298 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a0 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2a4 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xc2a8 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0xc2ac >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x301d0 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30238 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30250 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x30254 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0x30258 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0x3025c >> 2),
826         0x00000000,
827         (0x4e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x5e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x6e00 << 16) | (0xc900 >> 2),
832         0x00000000,
833         (0x7e00 << 16) | (0xc900 >> 2),
834         0x00000000,
835         (0x4e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x5e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x6e00 << 16) | (0xc904 >> 2),
840         0x00000000,
841         (0x7e00 << 16) | (0xc904 >> 2),
842         0x00000000,
843         (0x4e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x5e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x6e00 << 16) | (0xc908 >> 2),
848         0x00000000,
849         (0x7e00 << 16) | (0xc908 >> 2),
850         0x00000000,
851         (0x4e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x5e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x6e00 << 16) | (0xc90c >> 2),
856         0x00000000,
857         (0x7e00 << 16) | (0xc90c >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0xc910 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0xc910 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xc99c >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x9834 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f00 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f04 >> 2),
874         0x00000000,
875         (0x0000 << 16) | (0x30f08 >> 2),
876         0x00000000,
877         (0x0000 << 16) | (0x30f0c >> 2),
878         0x00000000,
879         (0x0600 << 16) | (0x9b7c >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x8a14 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8a18 >> 2),
884         0x00000000,
885         (0x0600 << 16) | (0x30a00 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8bf0 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x8bcc >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8b24 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30a04 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a10 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a14 >> 2),
898         0x00000000,
899         (0x0600 << 16) | (0x30a18 >> 2),
900         0x00000000,
901         (0x0600 << 16) | (0x30a2c >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc700 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc704 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0xc708 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0xc768 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc770 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc774 >> 2),
914         0x00000000,
915         (0x0400 << 16) | (0xc798 >> 2),
916         0x00000000,
917         (0x0400 << 16) | (0xc79c >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x9100 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x3c010 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c00 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c04 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c20 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x8c38 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x8c3c >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xae00 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0x9604 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac08 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac0c >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac10 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac14 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac58 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac68 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac6c >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac70 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac74 >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac78 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac7c >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac80 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac84 >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0xac88 >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0xac8c >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x970c >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x9714 >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x9718 >> 2),
972         0x00000000,
973         (0x0e00 << 16) | (0x971c >> 2),
974         0x00000000,
975         (0x0e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x4e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x5e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x6e00 << 16) | (0x31068 >> 2),
982         0x00000000,
983         (0x7e00 << 16) | (0x31068 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0xcd10 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0xcd14 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b0 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88b4 >> 2),
992         0x00000000,
993         (0x0e00 << 16) | (0x88b8 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x88bc >> 2),
996         0x00000000,
997         (0x0400 << 16) | (0x89c0 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88c4 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88c8 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d0 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x88d4 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x88d8 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x8980 >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x30938 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x3093c >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30940 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x89a0 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x30900 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x30904 >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x89b4 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3e1fc >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c210 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x3c214 >> 2),
1030         0x00000000,
1031         (0x0e00 << 16) | (0x3c218 >> 2),
1032         0x00000000,
1033         (0x0e00 << 16) | (0x8904 >> 2),
1034         0x00000000,
1035         0x5,
1036         (0x0e00 << 16) | (0x8c28 >> 2),
1037         (0x0e00 << 16) | (0x8c2c >> 2),
1038         (0x0e00 << 16) | (0x8c30 >> 2),
1039         (0x0e00 << 16) | (0x8c34 >> 2),
1040         (0x0e00 << 16) | (0x9600 >> 2),
1041 };
1042
1043 static const u32 bonaire_golden_spm_registers[] =
1044 {
1045         0x30800, 0xe0ffffff, 0xe0000000
1046 };
1047
1048 static const u32 bonaire_golden_common_registers[] =
1049 {
1050         0xc770, 0xffffffff, 0x00000800,
1051         0xc774, 0xffffffff, 0x00000800,
1052         0xc798, 0xffffffff, 0x00007fbf,
1053         0xc79c, 0xffffffff, 0x00007faf
1054 };
1055
1056 static const u32 bonaire_golden_registers[] =
1057 {
1058         0x3354, 0x00000333, 0x00000333,
1059         0x3350, 0x000c0fc0, 0x00040200,
1060         0x9a10, 0x00010000, 0x00058208,
1061         0x3c000, 0xffff1fff, 0x00140000,
1062         0x3c200, 0xfdfc0fff, 0x00000100,
1063         0x3c234, 0x40000000, 0x40000200,
1064         0x9830, 0xffffffff, 0x00000000,
1065         0x9834, 0xf00fffff, 0x00000400,
1066         0x9838, 0x0002021c, 0x00020200,
1067         0xc78, 0x00000080, 0x00000000,
1068         0x5bb0, 0x000000f0, 0x00000070,
1069         0x5bc0, 0xf0311fff, 0x80300000,
1070         0x98f8, 0x73773777, 0x12010001,
1071         0x350c, 0x00810000, 0x408af000,
1072         0x7030, 0x31000111, 0x00000011,
1073         0x2f48, 0x73773777, 0x12010001,
1074         0x220c, 0x00007fb6, 0x0021a1b1,
1075         0x2210, 0x00007fb6, 0x002021b1,
1076         0x2180, 0x00007fb6, 0x00002191,
1077         0x2218, 0x00007fb6, 0x002121b1,
1078         0x221c, 0x00007fb6, 0x002021b1,
1079         0x21dc, 0x00007fb6, 0x00002191,
1080         0x21e0, 0x00007fb6, 0x00002191,
1081         0x3628, 0x0000003f, 0x0000000a,
1082         0x362c, 0x0000003f, 0x0000000a,
1083         0x2ae4, 0x00073ffe, 0x000022a2,
1084         0x240c, 0x000007ff, 0x00000000,
1085         0x8a14, 0xf000003f, 0x00000007,
1086         0x8bf0, 0x00002001, 0x00000001,
1087         0x8b24, 0xffffffff, 0x00ffffff,
1088         0x30a04, 0x0000ff0f, 0x00000000,
1089         0x28a4c, 0x07ffffff, 0x06000000,
1090         0x4d8, 0x00000fff, 0x00000100,
1091         0x3e78, 0x00000001, 0x00000002,
1092         0x9100, 0x03000000, 0x0362c688,
1093         0x8c00, 0x000000ff, 0x00000001,
1094         0xe40, 0x00001fff, 0x00001fff,
1095         0x9060, 0x0000007f, 0x00000020,
1096         0x9508, 0x00010000, 0x00010000,
1097         0xac14, 0x000003ff, 0x000000f3,
1098         0xac0c, 0xffffffff, 0x00001032
1099 };
1100
1101 static const u32 bonaire_mgcg_cgcg_init[] =
1102 {
1103         0xc420, 0xffffffff, 0xfffffffc,
1104         0x30800, 0xffffffff, 0xe0000000,
1105         0x3c2a0, 0xffffffff, 0x00000100,
1106         0x3c208, 0xffffffff, 0x00000100,
1107         0x3c2c0, 0xffffffff, 0xc0000100,
1108         0x3c2c8, 0xffffffff, 0xc0000100,
1109         0x3c2c4, 0xffffffff, 0xc0000100,
1110         0x55e4, 0xffffffff, 0x00600100,
1111         0x3c280, 0xffffffff, 0x00000100,
1112         0x3c214, 0xffffffff, 0x06000100,
1113         0x3c220, 0xffffffff, 0x00000100,
1114         0x3c218, 0xffffffff, 0x06000100,
1115         0x3c204, 0xffffffff, 0x00000100,
1116         0x3c2e0, 0xffffffff, 0x00000100,
1117         0x3c224, 0xffffffff, 0x00000100,
1118         0x3c200, 0xffffffff, 0x00000100,
1119         0x3c230, 0xffffffff, 0x00000100,
1120         0x3c234, 0xffffffff, 0x00000100,
1121         0x3c250, 0xffffffff, 0x00000100,
1122         0x3c254, 0xffffffff, 0x00000100,
1123         0x3c258, 0xffffffff, 0x00000100,
1124         0x3c25c, 0xffffffff, 0x00000100,
1125         0x3c260, 0xffffffff, 0x00000100,
1126         0x3c27c, 0xffffffff, 0x00000100,
1127         0x3c278, 0xffffffff, 0x00000100,
1128         0x3c210, 0xffffffff, 0x06000100,
1129         0x3c290, 0xffffffff, 0x00000100,
1130         0x3c274, 0xffffffff, 0x00000100,
1131         0x3c2b4, 0xffffffff, 0x00000100,
1132         0x3c2b0, 0xffffffff, 0x00000100,
1133         0x3c270, 0xffffffff, 0x00000100,
1134         0x30800, 0xffffffff, 0xe0000000,
1135         0x3c020, 0xffffffff, 0x00010000,
1136         0x3c024, 0xffffffff, 0x00030002,
1137         0x3c028, 0xffffffff, 0x00040007,
1138         0x3c02c, 0xffffffff, 0x00060005,
1139         0x3c030, 0xffffffff, 0x00090008,
1140         0x3c034, 0xffffffff, 0x00010000,
1141         0x3c038, 0xffffffff, 0x00030002,
1142         0x3c03c, 0xffffffff, 0x00040007,
1143         0x3c040, 0xffffffff, 0x00060005,
1144         0x3c044, 0xffffffff, 0x00090008,
1145         0x3c048, 0xffffffff, 0x00010000,
1146         0x3c04c, 0xffffffff, 0x00030002,
1147         0x3c050, 0xffffffff, 0x00040007,
1148         0x3c054, 0xffffffff, 0x00060005,
1149         0x3c058, 0xffffffff, 0x00090008,
1150         0x3c05c, 0xffffffff, 0x00010000,
1151         0x3c060, 0xffffffff, 0x00030002,
1152         0x3c064, 0xffffffff, 0x00040007,
1153         0x3c068, 0xffffffff, 0x00060005,
1154         0x3c06c, 0xffffffff, 0x00090008,
1155         0x3c070, 0xffffffff, 0x00010000,
1156         0x3c074, 0xffffffff, 0x00030002,
1157         0x3c078, 0xffffffff, 0x00040007,
1158         0x3c07c, 0xffffffff, 0x00060005,
1159         0x3c080, 0xffffffff, 0x00090008,
1160         0x3c084, 0xffffffff, 0x00010000,
1161         0x3c088, 0xffffffff, 0x00030002,
1162         0x3c08c, 0xffffffff, 0x00040007,
1163         0x3c090, 0xffffffff, 0x00060005,
1164         0x3c094, 0xffffffff, 0x00090008,
1165         0x3c098, 0xffffffff, 0x00010000,
1166         0x3c09c, 0xffffffff, 0x00030002,
1167         0x3c0a0, 0xffffffff, 0x00040007,
1168         0x3c0a4, 0xffffffff, 0x00060005,
1169         0x3c0a8, 0xffffffff, 0x00090008,
1170         0x3c000, 0xffffffff, 0x96e00200,
1171         0x8708, 0xffffffff, 0x00900100,
1172         0xc424, 0xffffffff, 0x0020003f,
1173         0x38, 0xffffffff, 0x0140001c,
1174         0x3c, 0x000f0000, 0x000f0000,
1175         0x220, 0xffffffff, 0xC060000C,
1176         0x224, 0xc0000fff, 0x00000100,
1177         0xf90, 0xffffffff, 0x00000100,
1178         0xf98, 0x00000101, 0x00000000,
1179         0x20a8, 0xffffffff, 0x00000104,
1180         0x55e4, 0xff000fff, 0x00000100,
1181         0x30cc, 0xc0000fff, 0x00000104,
1182         0xc1e4, 0x00000001, 0x00000001,
1183         0xd00c, 0xff000ff0, 0x00000100,
1184         0xd80c, 0xff000ff0, 0x00000100
1185 };
1186
1187 static const u32 spectre_golden_spm_registers[] =
1188 {
1189         0x30800, 0xe0ffffff, 0xe0000000
1190 };
1191
1192 static const u32 spectre_golden_common_registers[] =
1193 {
1194         0xc770, 0xffffffff, 0x00000800,
1195         0xc774, 0xffffffff, 0x00000800,
1196         0xc798, 0xffffffff, 0x00007fbf,
1197         0xc79c, 0xffffffff, 0x00007faf
1198 };
1199
1200 static const u32 spectre_golden_registers[] =
1201 {
1202         0x3c000, 0xffff1fff, 0x96940200,
1203         0x3c00c, 0xffff0001, 0xff000000,
1204         0x3c200, 0xfffc0fff, 0x00000100,
1205         0x6ed8, 0x00010101, 0x00010000,
1206         0x9834, 0xf00fffff, 0x00000400,
1207         0x9838, 0xfffffffc, 0x00020200,
1208         0x5bb0, 0x000000f0, 0x00000070,
1209         0x5bc0, 0xf0311fff, 0x80300000,
1210         0x98f8, 0x73773777, 0x12010001,
1211         0x9b7c, 0x00ff0000, 0x00fc0000,
1212         0x2f48, 0x73773777, 0x12010001,
1213         0x8a14, 0xf000003f, 0x00000007,
1214         0x8b24, 0xffffffff, 0x00ffffff,
1215         0x28350, 0x3f3f3fff, 0x00000082,
1216         0x28354, 0x0000003f, 0x00000000,
1217         0x3e78, 0x00000001, 0x00000002,
1218         0x913c, 0xffff03df, 0x00000004,
1219         0xc768, 0x00000008, 0x00000008,
1220         0x8c00, 0x000008ff, 0x00000800,
1221         0x9508, 0x00010000, 0x00010000,
1222         0xac0c, 0xffffffff, 0x54763210,
1223         0x214f8, 0x01ff01ff, 0x00000002,
1224         0x21498, 0x007ff800, 0x00200000,
1225         0x2015c, 0xffffffff, 0x00000f40,
1226         0x30934, 0xffffffff, 0x00000001
1227 };
1228
1229 static const u32 spectre_mgcg_cgcg_init[] =
1230 {
1231         0xc420, 0xffffffff, 0xfffffffc,
1232         0x30800, 0xffffffff, 0xe0000000,
1233         0x3c2a0, 0xffffffff, 0x00000100,
1234         0x3c208, 0xffffffff, 0x00000100,
1235         0x3c2c0, 0xffffffff, 0x00000100,
1236         0x3c2c8, 0xffffffff, 0x00000100,
1237         0x3c2c4, 0xffffffff, 0x00000100,
1238         0x55e4, 0xffffffff, 0x00600100,
1239         0x3c280, 0xffffffff, 0x00000100,
1240         0x3c214, 0xffffffff, 0x06000100,
1241         0x3c220, 0xffffffff, 0x00000100,
1242         0x3c218, 0xffffffff, 0x06000100,
1243         0x3c204, 0xffffffff, 0x00000100,
1244         0x3c2e0, 0xffffffff, 0x00000100,
1245         0x3c224, 0xffffffff, 0x00000100,
1246         0x3c200, 0xffffffff, 0x00000100,
1247         0x3c230, 0xffffffff, 0x00000100,
1248         0x3c234, 0xffffffff, 0x00000100,
1249         0x3c250, 0xffffffff, 0x00000100,
1250         0x3c254, 0xffffffff, 0x00000100,
1251         0x3c258, 0xffffffff, 0x00000100,
1252         0x3c25c, 0xffffffff, 0x00000100,
1253         0x3c260, 0xffffffff, 0x00000100,
1254         0x3c27c, 0xffffffff, 0x00000100,
1255         0x3c278, 0xffffffff, 0x00000100,
1256         0x3c210, 0xffffffff, 0x06000100,
1257         0x3c290, 0xffffffff, 0x00000100,
1258         0x3c274, 0xffffffff, 0x00000100,
1259         0x3c2b4, 0xffffffff, 0x00000100,
1260         0x3c2b0, 0xffffffff, 0x00000100,
1261         0x3c270, 0xffffffff, 0x00000100,
1262         0x30800, 0xffffffff, 0xe0000000,
1263         0x3c020, 0xffffffff, 0x00010000,
1264         0x3c024, 0xffffffff, 0x00030002,
1265         0x3c028, 0xffffffff, 0x00040007,
1266         0x3c02c, 0xffffffff, 0x00060005,
1267         0x3c030, 0xffffffff, 0x00090008,
1268         0x3c034, 0xffffffff, 0x00010000,
1269         0x3c038, 0xffffffff, 0x00030002,
1270         0x3c03c, 0xffffffff, 0x00040007,
1271         0x3c040, 0xffffffff, 0x00060005,
1272         0x3c044, 0xffffffff, 0x00090008,
1273         0x3c048, 0xffffffff, 0x00010000,
1274         0x3c04c, 0xffffffff, 0x00030002,
1275         0x3c050, 0xffffffff, 0x00040007,
1276         0x3c054, 0xffffffff, 0x00060005,
1277         0x3c058, 0xffffffff, 0x00090008,
1278         0x3c05c, 0xffffffff, 0x00010000,
1279         0x3c060, 0xffffffff, 0x00030002,
1280         0x3c064, 0xffffffff, 0x00040007,
1281         0x3c068, 0xffffffff, 0x00060005,
1282         0x3c06c, 0xffffffff, 0x00090008,
1283         0x3c070, 0xffffffff, 0x00010000,
1284         0x3c074, 0xffffffff, 0x00030002,
1285         0x3c078, 0xffffffff, 0x00040007,
1286         0x3c07c, 0xffffffff, 0x00060005,
1287         0x3c080, 0xffffffff, 0x00090008,
1288         0x3c084, 0xffffffff, 0x00010000,
1289         0x3c088, 0xffffffff, 0x00030002,
1290         0x3c08c, 0xffffffff, 0x00040007,
1291         0x3c090, 0xffffffff, 0x00060005,
1292         0x3c094, 0xffffffff, 0x00090008,
1293         0x3c098, 0xffffffff, 0x00010000,
1294         0x3c09c, 0xffffffff, 0x00030002,
1295         0x3c0a0, 0xffffffff, 0x00040007,
1296         0x3c0a4, 0xffffffff, 0x00060005,
1297         0x3c0a8, 0xffffffff, 0x00090008,
1298         0x3c0ac, 0xffffffff, 0x00010000,
1299         0x3c0b0, 0xffffffff, 0x00030002,
1300         0x3c0b4, 0xffffffff, 0x00040007,
1301         0x3c0b8, 0xffffffff, 0x00060005,
1302         0x3c0bc, 0xffffffff, 0x00090008,
1303         0x3c000, 0xffffffff, 0x96e00200,
1304         0x8708, 0xffffffff, 0x00900100,
1305         0xc424, 0xffffffff, 0x0020003f,
1306         0x38, 0xffffffff, 0x0140001c,
1307         0x3c, 0x000f0000, 0x000f0000,
1308         0x220, 0xffffffff, 0xC060000C,
1309         0x224, 0xc0000fff, 0x00000100,
1310         0xf90, 0xffffffff, 0x00000100,
1311         0xf98, 0x00000101, 0x00000000,
1312         0x20a8, 0xffffffff, 0x00000104,
1313         0x55e4, 0xff000fff, 0x00000100,
1314         0x30cc, 0xc0000fff, 0x00000104,
1315         0xc1e4, 0x00000001, 0x00000001,
1316         0xd00c, 0xff000ff0, 0x00000100,
1317         0xd80c, 0xff000ff0, 0x00000100
1318 };
1319
1320 static const u32 kalindi_golden_spm_registers[] =
1321 {
1322         0x30800, 0xe0ffffff, 0xe0000000
1323 };
1324
1325 static const u32 kalindi_golden_common_registers[] =
1326 {
1327         0xc770, 0xffffffff, 0x00000800,
1328         0xc774, 0xffffffff, 0x00000800,
1329         0xc798, 0xffffffff, 0x00007fbf,
1330         0xc79c, 0xffffffff, 0x00007faf
1331 };
1332
1333 static const u32 kalindi_golden_registers[] =
1334 {
1335         0x3c000, 0xffffdfff, 0x6e944040,
1336         0x55e4, 0xff607fff, 0xfc000100,
1337         0x3c220, 0xff000fff, 0x00000100,
1338         0x3c224, 0xff000fff, 0x00000100,
1339         0x3c200, 0xfffc0fff, 0x00000100,
1340         0x6ed8, 0x00010101, 0x00010000,
1341         0x9830, 0xffffffff, 0x00000000,
1342         0x9834, 0xf00fffff, 0x00000400,
1343         0x5bb0, 0x000000f0, 0x00000070,
1344         0x5bc0, 0xf0311fff, 0x80300000,
1345         0x98f8, 0x73773777, 0x12010001,
1346         0x98fc, 0xffffffff, 0x00000010,
1347         0x9b7c, 0x00ff0000, 0x00fc0000,
1348         0x8030, 0x00001f0f, 0x0000100a,
1349         0x2f48, 0x73773777, 0x12010001,
1350         0x2408, 0x000fffff, 0x000c007f,
1351         0x8a14, 0xf000003f, 0x00000007,
1352         0x8b24, 0x3fff3fff, 0x00ffcfff,
1353         0x30a04, 0x0000ff0f, 0x00000000,
1354         0x28a4c, 0x07ffffff, 0x06000000,
1355         0x4d8, 0x00000fff, 0x00000100,
1356         0x3e78, 0x00000001, 0x00000002,
1357         0xc768, 0x00000008, 0x00000008,
1358         0x8c00, 0x000000ff, 0x00000003,
1359         0x214f8, 0x01ff01ff, 0x00000002,
1360         0x21498, 0x007ff800, 0x00200000,
1361         0x2015c, 0xffffffff, 0x00000f40,
1362         0x88c4, 0x001f3ae3, 0x00000082,
1363         0x88d4, 0x0000001f, 0x00000010,
1364         0x30934, 0xffffffff, 0x00000000
1365 };
1366
1367 static const u32 kalindi_mgcg_cgcg_init[] =
1368 {
1369         0xc420, 0xffffffff, 0xfffffffc,
1370         0x30800, 0xffffffff, 0xe0000000,
1371         0x3c2a0, 0xffffffff, 0x00000100,
1372         0x3c208, 0xffffffff, 0x00000100,
1373         0x3c2c0, 0xffffffff, 0x00000100,
1374         0x3c2c8, 0xffffffff, 0x00000100,
1375         0x3c2c4, 0xffffffff, 0x00000100,
1376         0x55e4, 0xffffffff, 0x00600100,
1377         0x3c280, 0xffffffff, 0x00000100,
1378         0x3c214, 0xffffffff, 0x06000100,
1379         0x3c220, 0xffffffff, 0x00000100,
1380         0x3c218, 0xffffffff, 0x06000100,
1381         0x3c204, 0xffffffff, 0x00000100,
1382         0x3c2e0, 0xffffffff, 0x00000100,
1383         0x3c224, 0xffffffff, 0x00000100,
1384         0x3c200, 0xffffffff, 0x00000100,
1385         0x3c230, 0xffffffff, 0x00000100,
1386         0x3c234, 0xffffffff, 0x00000100,
1387         0x3c250, 0xffffffff, 0x00000100,
1388         0x3c254, 0xffffffff, 0x00000100,
1389         0x3c258, 0xffffffff, 0x00000100,
1390         0x3c25c, 0xffffffff, 0x00000100,
1391         0x3c260, 0xffffffff, 0x00000100,
1392         0x3c27c, 0xffffffff, 0x00000100,
1393         0x3c278, 0xffffffff, 0x00000100,
1394         0x3c210, 0xffffffff, 0x06000100,
1395         0x3c290, 0xffffffff, 0x00000100,
1396         0x3c274, 0xffffffff, 0x00000100,
1397         0x3c2b4, 0xffffffff, 0x00000100,
1398         0x3c2b0, 0xffffffff, 0x00000100,
1399         0x3c270, 0xffffffff, 0x00000100,
1400         0x30800, 0xffffffff, 0xe0000000,
1401         0x3c020, 0xffffffff, 0x00010000,
1402         0x3c024, 0xffffffff, 0x00030002,
1403         0x3c028, 0xffffffff, 0x00040007,
1404         0x3c02c, 0xffffffff, 0x00060005,
1405         0x3c030, 0xffffffff, 0x00090008,
1406         0x3c034, 0xffffffff, 0x00010000,
1407         0x3c038, 0xffffffff, 0x00030002,
1408         0x3c03c, 0xffffffff, 0x00040007,
1409         0x3c040, 0xffffffff, 0x00060005,
1410         0x3c044, 0xffffffff, 0x00090008,
1411         0x3c000, 0xffffffff, 0x96e00200,
1412         0x8708, 0xffffffff, 0x00900100,
1413         0xc424, 0xffffffff, 0x0020003f,
1414         0x38, 0xffffffff, 0x0140001c,
1415         0x3c, 0x000f0000, 0x000f0000,
1416         0x220, 0xffffffff, 0xC060000C,
1417         0x224, 0xc0000fff, 0x00000100,
1418         0x20a8, 0xffffffff, 0x00000104,
1419         0x55e4, 0xff000fff, 0x00000100,
1420         0x30cc, 0xc0000fff, 0x00000104,
1421         0xc1e4, 0x00000001, 0x00000001,
1422         0xd00c, 0xff000ff0, 0x00000100,
1423         0xd80c, 0xff000ff0, 0x00000100
1424 };
1425
1426 static const u32 hawaii_golden_spm_registers[] =
1427 {
1428         0x30800, 0xe0ffffff, 0xe0000000
1429 };
1430
1431 static const u32 hawaii_golden_common_registers[] =
1432 {
1433         0x30800, 0xffffffff, 0xe0000000,
1434         0x28350, 0xffffffff, 0x3a00161a,
1435         0x28354, 0xffffffff, 0x0000002e,
1436         0x9a10, 0xffffffff, 0x00018208,
1437         0x98f8, 0xffffffff, 0x12011003
1438 };
1439
1440 static const u32 hawaii_golden_registers[] =
1441 {
1442         0x3354, 0x00000333, 0x00000333,
1443         0x9a10, 0x00010000, 0x00058208,
1444         0x9830, 0xffffffff, 0x00000000,
1445         0x9834, 0xf00fffff, 0x00000400,
1446         0x9838, 0x0002021c, 0x00020200,
1447         0xc78, 0x00000080, 0x00000000,
1448         0x5bb0, 0x000000f0, 0x00000070,
1449         0x5bc0, 0xf0311fff, 0x80300000,
1450         0x350c, 0x00810000, 0x408af000,
1451         0x7030, 0x31000111, 0x00000011,
1452         0x2f48, 0x73773777, 0x12010001,
1453         0x2120, 0x0000007f, 0x0000001b,
1454         0x21dc, 0x00007fb6, 0x00002191,
1455         0x3628, 0x0000003f, 0x0000000a,
1456         0x362c, 0x0000003f, 0x0000000a,
1457         0x2ae4, 0x00073ffe, 0x000022a2,
1458         0x240c, 0x000007ff, 0x00000000,
1459         0x8bf0, 0x00002001, 0x00000001,
1460         0x8b24, 0xffffffff, 0x00ffffff,
1461         0x30a04, 0x0000ff0f, 0x00000000,
1462         0x28a4c, 0x07ffffff, 0x06000000,
1463         0x3e78, 0x00000001, 0x00000002,
1464         0xc768, 0x00000008, 0x00000008,
1465         0xc770, 0x00000f00, 0x00000800,
1466         0xc774, 0x00000f00, 0x00000800,
1467         0xc798, 0x00ffffff, 0x00ff7fbf,
1468         0xc79c, 0x00ffffff, 0x00ff7faf,
1469         0x8c00, 0x000000ff, 0x00000800,
1470         0xe40, 0x00001fff, 0x00001fff,
1471         0x9060, 0x0000007f, 0x00000020,
1472         0x9508, 0x00010000, 0x00010000,
1473         0xae00, 0x00100000, 0x000ff07c,
1474         0xac14, 0x000003ff, 0x0000000f,
1475         0xac10, 0xffffffff, 0x7564fdec,
1476         0xac0c, 0xffffffff, 0x3120b9a8,
1477         0xac08, 0x20000000, 0x0f9c0000
1478 };
1479
1480 static const u32 hawaii_mgcg_cgcg_init[] =
1481 {
1482         0xc420, 0xffffffff, 0xfffffffd,
1483         0x30800, 0xffffffff, 0xe0000000,
1484         0x3c2a0, 0xffffffff, 0x00000100,
1485         0x3c208, 0xffffffff, 0x00000100,
1486         0x3c2c0, 0xffffffff, 0x00000100,
1487         0x3c2c8, 0xffffffff, 0x00000100,
1488         0x3c2c4, 0xffffffff, 0x00000100,
1489         0x55e4, 0xffffffff, 0x00200100,
1490         0x3c280, 0xffffffff, 0x00000100,
1491         0x3c214, 0xffffffff, 0x06000100,
1492         0x3c220, 0xffffffff, 0x00000100,
1493         0x3c218, 0xffffffff, 0x06000100,
1494         0x3c204, 0xffffffff, 0x00000100,
1495         0x3c2e0, 0xffffffff, 0x00000100,
1496         0x3c224, 0xffffffff, 0x00000100,
1497         0x3c200, 0xffffffff, 0x00000100,
1498         0x3c230, 0xffffffff, 0x00000100,
1499         0x3c234, 0xffffffff, 0x00000100,
1500         0x3c250, 0xffffffff, 0x00000100,
1501         0x3c254, 0xffffffff, 0x00000100,
1502         0x3c258, 0xffffffff, 0x00000100,
1503         0x3c25c, 0xffffffff, 0x00000100,
1504         0x3c260, 0xffffffff, 0x00000100,
1505         0x3c27c, 0xffffffff, 0x00000100,
1506         0x3c278, 0xffffffff, 0x00000100,
1507         0x3c210, 0xffffffff, 0x06000100,
1508         0x3c290, 0xffffffff, 0x00000100,
1509         0x3c274, 0xffffffff, 0x00000100,
1510         0x3c2b4, 0xffffffff, 0x00000100,
1511         0x3c2b0, 0xffffffff, 0x00000100,
1512         0x3c270, 0xffffffff, 0x00000100,
1513         0x30800, 0xffffffff, 0xe0000000,
1514         0x3c020, 0xffffffff, 0x00010000,
1515         0x3c024, 0xffffffff, 0x00030002,
1516         0x3c028, 0xffffffff, 0x00040007,
1517         0x3c02c, 0xffffffff, 0x00060005,
1518         0x3c030, 0xffffffff, 0x00090008,
1519         0x3c034, 0xffffffff, 0x00010000,
1520         0x3c038, 0xffffffff, 0x00030002,
1521         0x3c03c, 0xffffffff, 0x00040007,
1522         0x3c040, 0xffffffff, 0x00060005,
1523         0x3c044, 0xffffffff, 0x00090008,
1524         0x3c048, 0xffffffff, 0x00010000,
1525         0x3c04c, 0xffffffff, 0x00030002,
1526         0x3c050, 0xffffffff, 0x00040007,
1527         0x3c054, 0xffffffff, 0x00060005,
1528         0x3c058, 0xffffffff, 0x00090008,
1529         0x3c05c, 0xffffffff, 0x00010000,
1530         0x3c060, 0xffffffff, 0x00030002,
1531         0x3c064, 0xffffffff, 0x00040007,
1532         0x3c068, 0xffffffff, 0x00060005,
1533         0x3c06c, 0xffffffff, 0x00090008,
1534         0x3c070, 0xffffffff, 0x00010000,
1535         0x3c074, 0xffffffff, 0x00030002,
1536         0x3c078, 0xffffffff, 0x00040007,
1537         0x3c07c, 0xffffffff, 0x00060005,
1538         0x3c080, 0xffffffff, 0x00090008,
1539         0x3c084, 0xffffffff, 0x00010000,
1540         0x3c088, 0xffffffff, 0x00030002,
1541         0x3c08c, 0xffffffff, 0x00040007,
1542         0x3c090, 0xffffffff, 0x00060005,
1543         0x3c094, 0xffffffff, 0x00090008,
1544         0x3c098, 0xffffffff, 0x00010000,
1545         0x3c09c, 0xffffffff, 0x00030002,
1546         0x3c0a0, 0xffffffff, 0x00040007,
1547         0x3c0a4, 0xffffffff, 0x00060005,
1548         0x3c0a8, 0xffffffff, 0x00090008,
1549         0x3c0ac, 0xffffffff, 0x00010000,
1550         0x3c0b0, 0xffffffff, 0x00030002,
1551         0x3c0b4, 0xffffffff, 0x00040007,
1552         0x3c0b8, 0xffffffff, 0x00060005,
1553         0x3c0bc, 0xffffffff, 0x00090008,
1554         0x3c0c0, 0xffffffff, 0x00010000,
1555         0x3c0c4, 0xffffffff, 0x00030002,
1556         0x3c0c8, 0xffffffff, 0x00040007,
1557         0x3c0cc, 0xffffffff, 0x00060005,
1558         0x3c0d0, 0xffffffff, 0x00090008,
1559         0x3c0d4, 0xffffffff, 0x00010000,
1560         0x3c0d8, 0xffffffff, 0x00030002,
1561         0x3c0dc, 0xffffffff, 0x00040007,
1562         0x3c0e0, 0xffffffff, 0x00060005,
1563         0x3c0e4, 0xffffffff, 0x00090008,
1564         0x3c0e8, 0xffffffff, 0x00010000,
1565         0x3c0ec, 0xffffffff, 0x00030002,
1566         0x3c0f0, 0xffffffff, 0x00040007,
1567         0x3c0f4, 0xffffffff, 0x00060005,
1568         0x3c0f8, 0xffffffff, 0x00090008,
1569         0xc318, 0xffffffff, 0x00020200,
1570         0x3350, 0xffffffff, 0x00000200,
1571         0x15c0, 0xffffffff, 0x00000400,
1572         0x55e8, 0xffffffff, 0x00000000,
1573         0x2f50, 0xffffffff, 0x00000902,
1574         0x3c000, 0xffffffff, 0x96940200,
1575         0x8708, 0xffffffff, 0x00900100,
1576         0xc424, 0xffffffff, 0x0020003f,
1577         0x38, 0xffffffff, 0x0140001c,
1578         0x3c, 0x000f0000, 0x000f0000,
1579         0x220, 0xffffffff, 0xc060000c,
1580         0x224, 0xc0000fff, 0x00000100,
1581         0xf90, 0xffffffff, 0x00000100,
1582         0xf98, 0x00000101, 0x00000000,
1583         0x20a8, 0xffffffff, 0x00000104,
1584         0x55e4, 0xff000fff, 0x00000100,
1585         0x30cc, 0xc0000fff, 0x00000104,
1586         0xc1e4, 0x00000001, 0x00000001,
1587         0xd00c, 0xff000ff0, 0x00000100,
1588         0xd80c, 0xff000ff0, 0x00000100
1589 };
1590
1591 static const u32 godavari_golden_registers[] =
1592 {
1593         0x55e4, 0xff607fff, 0xfc000100,
1594         0x6ed8, 0x00010101, 0x00010000,
1595         0x9830, 0xffffffff, 0x00000000,
1596         0x98302, 0xf00fffff, 0x00000400,
1597         0x6130, 0xffffffff, 0x00010000,
1598         0x5bb0, 0x000000f0, 0x00000070,
1599         0x5bc0, 0xf0311fff, 0x80300000,
1600         0x98f8, 0x73773777, 0x12010001,
1601         0x98fc, 0xffffffff, 0x00000010,
1602         0x8030, 0x00001f0f, 0x0000100a,
1603         0x2f48, 0x73773777, 0x12010001,
1604         0x2408, 0x000fffff, 0x000c007f,
1605         0x8a14, 0xf000003f, 0x00000007,
1606         0x8b24, 0xffffffff, 0x00ff0fff,
1607         0x30a04, 0x0000ff0f, 0x00000000,
1608         0x28a4c, 0x07ffffff, 0x06000000,
1609         0x4d8, 0x00000fff, 0x00000100,
1610         0xd014, 0x00010000, 0x00810001,
1611         0xd814, 0x00010000, 0x00810001,
1612         0x3e78, 0x00000001, 0x00000002,
1613         0xc768, 0x00000008, 0x00000008,
1614         0xc770, 0x00000f00, 0x00000800,
1615         0xc774, 0x00000f00, 0x00000800,
1616         0xc798, 0x00ffffff, 0x00ff7fbf,
1617         0xc79c, 0x00ffffff, 0x00ff7faf,
1618         0x8c00, 0x000000ff, 0x00000001,
1619         0x214f8, 0x01ff01ff, 0x00000002,
1620         0x21498, 0x007ff800, 0x00200000,
1621         0x2015c, 0xffffffff, 0x00000f40,
1622         0x88c4, 0x001f3ae3, 0x00000082,
1623         0x88d4, 0x0000001f, 0x00000010,
1624         0x30934, 0xffffffff, 0x00000000
1625 };
1626
1627
1628 static void cik_init_golden_registers(struct radeon_device *rdev)
1629 {
1630         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1631         mutex_lock(&rdev->grbm_idx_mutex);
1632         switch (rdev->family) {
1633         case CHIP_BONAIRE:
1634                 radeon_program_register_sequence(rdev,
1635                                                  bonaire_mgcg_cgcg_init,
1636                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1637                 radeon_program_register_sequence(rdev,
1638                                                  bonaire_golden_registers,
1639                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1640                 radeon_program_register_sequence(rdev,
1641                                                  bonaire_golden_common_registers,
1642                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1643                 radeon_program_register_sequence(rdev,
1644                                                  bonaire_golden_spm_registers,
1645                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1646                 break;
1647         case CHIP_KABINI:
1648                 radeon_program_register_sequence(rdev,
1649                                                  kalindi_mgcg_cgcg_init,
1650                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1651                 radeon_program_register_sequence(rdev,
1652                                                  kalindi_golden_registers,
1653                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1654                 radeon_program_register_sequence(rdev,
1655                                                  kalindi_golden_common_registers,
1656                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1657                 radeon_program_register_sequence(rdev,
1658                                                  kalindi_golden_spm_registers,
1659                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1660                 break;
1661         case CHIP_MULLINS:
1662                 radeon_program_register_sequence(rdev,
1663                                                  kalindi_mgcg_cgcg_init,
1664                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1665                 radeon_program_register_sequence(rdev,
1666                                                  godavari_golden_registers,
1667                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1668                 radeon_program_register_sequence(rdev,
1669                                                  kalindi_golden_common_registers,
1670                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1671                 radeon_program_register_sequence(rdev,
1672                                                  kalindi_golden_spm_registers,
1673                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1674                 break;
1675         case CHIP_KAVERI:
1676                 radeon_program_register_sequence(rdev,
1677                                                  spectre_mgcg_cgcg_init,
1678                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1679                 radeon_program_register_sequence(rdev,
1680                                                  spectre_golden_registers,
1681                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1682                 radeon_program_register_sequence(rdev,
1683                                                  spectre_golden_common_registers,
1684                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1685                 radeon_program_register_sequence(rdev,
1686                                                  spectre_golden_spm_registers,
1687                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1688                 break;
1689         case CHIP_HAWAII:
1690                 radeon_program_register_sequence(rdev,
1691                                                  hawaii_mgcg_cgcg_init,
1692                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1693                 radeon_program_register_sequence(rdev,
1694                                                  hawaii_golden_registers,
1695                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1696                 radeon_program_register_sequence(rdev,
1697                                                  hawaii_golden_common_registers,
1698                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1699                 radeon_program_register_sequence(rdev,
1700                                                  hawaii_golden_spm_registers,
1701                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1702                 break;
1703         default:
1704                 break;
1705         }
1706         mutex_unlock(&rdev->grbm_idx_mutex);
1707 }
1708
1709 /**
1710  * cik_get_xclk - get the xclk
1711  *
1712  * @rdev: radeon_device pointer
1713  *
1714  * Returns the reference clock used by the gfx engine
1715  * (CIK).
1716  */
1717 u32 cik_get_xclk(struct radeon_device *rdev)
1718 {
1719         u32 reference_clock = rdev->clock.spll.reference_freq;
1720
1721         if (rdev->flags & RADEON_IS_IGP) {
1722                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1723                         return reference_clock / 2;
1724         } else {
1725                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1726                         return reference_clock / 4;
1727         }
1728         return reference_clock;
1729 }
1730
1731 /**
1732  * cik_mm_rdoorbell - read a doorbell dword
1733  *
1734  * @rdev: radeon_device pointer
1735  * @index: doorbell index
1736  *
1737  * Returns the value in the doorbell aperture at the
1738  * requested doorbell index (CIK).
1739  */
1740 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1741 {
1742         if (index < rdev->doorbell.num_doorbells) {
1743                 return readl(rdev->doorbell.ptr + index);
1744         } else {
1745                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1746                 return 0;
1747         }
1748 }
1749
1750 /**
1751  * cik_mm_wdoorbell - write a doorbell dword
1752  *
1753  * @rdev: radeon_device pointer
1754  * @index: doorbell index
1755  * @v: value to write
1756  *
1757  * Writes @v to the doorbell aperture at the
1758  * requested doorbell index (CIK).
1759  */
1760 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1761 {
1762         if (index < rdev->doorbell.num_doorbells) {
1763                 writel(v, rdev->doorbell.ptr + index);
1764         } else {
1765                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1766         }
1767 }
1768
1769 #define BONAIRE_IO_MC_REGS_SIZE 36
1770
1771 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1772 {
1773         {0x00000070, 0x04400000},
1774         {0x00000071, 0x80c01803},
1775         {0x00000072, 0x00004004},
1776         {0x00000073, 0x00000100},
1777         {0x00000074, 0x00ff0000},
1778         {0x00000075, 0x34000000},
1779         {0x00000076, 0x08000014},
1780         {0x00000077, 0x00cc08ec},
1781         {0x00000078, 0x00000400},
1782         {0x00000079, 0x00000000},
1783         {0x0000007a, 0x04090000},
1784         {0x0000007c, 0x00000000},
1785         {0x0000007e, 0x4408a8e8},
1786         {0x0000007f, 0x00000304},
1787         {0x00000080, 0x00000000},
1788         {0x00000082, 0x00000001},
1789         {0x00000083, 0x00000002},
1790         {0x00000084, 0xf3e4f400},
1791         {0x00000085, 0x052024e3},
1792         {0x00000087, 0x00000000},
1793         {0x00000088, 0x01000000},
1794         {0x0000008a, 0x1c0a0000},
1795         {0x0000008b, 0xff010000},
1796         {0x0000008d, 0xffffefff},
1797         {0x0000008e, 0xfff3efff},
1798         {0x0000008f, 0xfff3efbf},
1799         {0x00000092, 0xf7ffffff},
1800         {0x00000093, 0xffffff7f},
1801         {0x00000095, 0x00101101},
1802         {0x00000096, 0x00000fff},
1803         {0x00000097, 0x00116fff},
1804         {0x00000098, 0x60010000},
1805         {0x00000099, 0x10010000},
1806         {0x0000009a, 0x00006000},
1807         {0x0000009b, 0x00001000},
1808         {0x0000009f, 0x00b48000}
1809 };
1810
1811 #define HAWAII_IO_MC_REGS_SIZE 22
1812
1813 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1814 {
1815         {0x0000007d, 0x40000000},
1816         {0x0000007e, 0x40180304},
1817         {0x0000007f, 0x0000ff00},
1818         {0x00000081, 0x00000000},
1819         {0x00000083, 0x00000800},
1820         {0x00000086, 0x00000000},
1821         {0x00000087, 0x00000100},
1822         {0x00000088, 0x00020100},
1823         {0x00000089, 0x00000000},
1824         {0x0000008b, 0x00040000},
1825         {0x0000008c, 0x00000100},
1826         {0x0000008e, 0xff010000},
1827         {0x00000090, 0xffffefff},
1828         {0x00000091, 0xfff3efff},
1829         {0x00000092, 0xfff3efbf},
1830         {0x00000093, 0xf7ffffff},
1831         {0x00000094, 0xffffff7f},
1832         {0x00000095, 0x00000fff},
1833         {0x00000096, 0x00116fff},
1834         {0x00000097, 0x60010000},
1835         {0x00000098, 0x10010000},
1836         {0x0000009f, 0x00c79000}
1837 };
1838
1839
1840 /**
1841  * cik_srbm_select - select specific register instances
1842  *
1843  * @rdev: radeon_device pointer
1844  * @me: selected ME (micro engine)
1845  * @pipe: pipe
1846  * @queue: queue
1847  * @vmid: VMID
1848  *
1849  * Switches the currently active registers instances.  Some
1850  * registers are instanced per VMID, others are instanced per
1851  * me/pipe/queue combination.
1852  */
1853 static void cik_srbm_select(struct radeon_device *rdev,
1854                             u32 me, u32 pipe, u32 queue, u32 vmid)
1855 {
1856         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1857                              MEID(me & 0x3) |
1858                              VMID(vmid & 0xf) |
1859                              QUEUEID(queue & 0x7));
1860         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1861 }
1862
1863 /* ucode loading */
1864 /**
1865  * ci_mc_load_microcode - load MC ucode into the hw
1866  *
1867  * @rdev: radeon_device pointer
1868  *
1869  * Load the GDDR MC ucode into the hw (CIK).
1870  * Returns 0 on success, error on failure.
1871  */
1872 int ci_mc_load_microcode(struct radeon_device *rdev)
1873 {
1874         const __be32 *fw_data = NULL;
1875         const __le32 *new_fw_data = NULL;
1876         u32 running, tmp;
1877         u32 *io_mc_regs = NULL;
1878         const __le32 *new_io_mc_regs = NULL;
1879         int i, regs_size, ucode_size;
1880
1881         if (!rdev->mc_fw)
1882                 return -EINVAL;
1883
1884         if (rdev->new_fw) {
1885                 const struct mc_firmware_header_v1_0 *hdr =
1886                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1887
1888                 radeon_ucode_print_mc_hdr(&hdr->header);
1889
1890                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1891                 new_io_mc_regs = (const __le32 *)
1892                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1893                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1894                 new_fw_data = (const __le32 *)
1895                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1896         } else {
1897                 ucode_size = rdev->mc_fw->size / 4;
1898
1899                 switch (rdev->family) {
1900                 case CHIP_BONAIRE:
1901                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1902                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1903                         break;
1904                 case CHIP_HAWAII:
1905                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1906                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1907                         break;
1908                 default:
1909                         return -EINVAL;
1910                 }
1911                 fw_data = (const __be32 *)rdev->mc_fw->data;
1912         }
1913
1914         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1915
1916         if (running == 0) {
1917                 /* reset the engine and set to writable */
1918                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1920
1921                 /* load mc io regs */
1922                 for (i = 0; i < regs_size; i++) {
1923                         if (rdev->new_fw) {
1924                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1925                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1926                         } else {
1927                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1928                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1929                         }
1930                 }
1931
1932                 tmp = RREG32(MC_SEQ_MISC0);
1933                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1934                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1935                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1936                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1937                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1938                 }
1939
1940                 /* load the MC ucode */
1941                 for (i = 0; i < ucode_size; i++) {
1942                         if (rdev->new_fw)
1943                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1944                         else
1945                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1946                 }
1947
1948                 /* put the engine back into the active state */
1949                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1952
1953                 /* wait for training to complete */
1954                 for (i = 0; i < rdev->usec_timeout; i++) {
1955                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1956                                 break;
1957                         udelay(1);
1958                 }
1959                 for (i = 0; i < rdev->usec_timeout; i++) {
1960                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1961                                 break;
1962                         udelay(1);
1963                 }
1964         }
1965
1966         return 0;
1967 }
1968
1969 /**
1970  * cik_init_microcode - load ucode images from disk
1971  *
1972  * @rdev: radeon_device pointer
1973  *
1974  * Use the firmware interface to load the ucode images into
1975  * the driver (not loaded into hw).
1976  * Returns 0 on success, error on failure.
1977  */
1978 static int cik_init_microcode(struct radeon_device *rdev)
1979 {
1980         const char *chip_name;
1981         const char *new_chip_name;
1982         size_t pfp_req_size, me_req_size, ce_req_size,
1983                 mec_req_size, rlc_req_size, mc_req_size = 0,
1984                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1985         char fw_name[30];
1986         int new_fw = 0;
1987         int err;
1988         int num_fw;
1989         bool new_smc = false;
1990
1991         DRM_DEBUG("\n");
1992
1993         switch (rdev->family) {
1994         case CHIP_BONAIRE:
1995                 chip_name = "BONAIRE";
1996                 if ((rdev->pdev->revision == 0x80) ||
1997                     (rdev->pdev->revision == 0x81) ||
1998                     (rdev->pdev->device == 0x665f))
1999                         new_smc = true;
2000                 new_chip_name = "bonaire";
2001                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2002                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2003                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2004                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2005                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2006                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2007                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2008                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2010                 num_fw = 8;
2011                 break;
2012         case CHIP_HAWAII:
2013                 chip_name = "HAWAII";
2014                 if (rdev->pdev->revision == 0x80)
2015                         new_smc = true;
2016                 new_chip_name = "hawaii";
2017                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2018                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2019                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2020                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2021                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2022                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2023                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2024                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2026                 num_fw = 8;
2027                 break;
2028         case CHIP_KAVERI:
2029                 chip_name = "KAVERI";
2030                 new_chip_name = "kaveri";
2031                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2032                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2033                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2034                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2035                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2036                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2037                 num_fw = 7;
2038                 break;
2039         case CHIP_KABINI:
2040                 chip_name = "KABINI";
2041                 new_chip_name = "kabini";
2042                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2043                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2044                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2045                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2046                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2047                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2048                 num_fw = 6;
2049                 break;
2050         case CHIP_MULLINS:
2051                 chip_name = "MULLINS";
2052                 new_chip_name = "mullins";
2053                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2054                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2055                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2056                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2057                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2058                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2059                 num_fw = 6;
2060                 break;
2061         default: BUG();
2062         }
2063
2064         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2065
2066         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2067         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068         if (err) {
2069                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2070                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2071                 if (err)
2072                         goto out;
2073                 if (rdev->pfp_fw->size != pfp_req_size) {
2074                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2075                                rdev->pfp_fw->size, fw_name);
2076                         err = -EINVAL;
2077                         goto out;
2078                 }
2079         } else {
2080                 err = radeon_ucode_validate(rdev->pfp_fw);
2081                 if (err) {
2082                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2083                                fw_name);
2084                         goto out;
2085                 } else {
2086                         new_fw++;
2087                 }
2088         }
2089
2090         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2091         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092         if (err) {
2093                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2094                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2095                 if (err)
2096                         goto out;
2097                 if (rdev->me_fw->size != me_req_size) {
2098                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->me_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->me_fw);
2104                 if (err) {
2105                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2106                                fw_name);
2107                         goto out;
2108                 } else {
2109                         new_fw++;
2110                 }
2111         }
2112
2113         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2114         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115         if (err) {
2116                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2117                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2118                 if (err)
2119                         goto out;
2120                 if (rdev->ce_fw->size != ce_req_size) {
2121                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2122                                rdev->ce_fw->size, fw_name);
2123                         err = -EINVAL;
2124                 }
2125         } else {
2126                 err = radeon_ucode_validate(rdev->ce_fw);
2127                 if (err) {
2128                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2129                                fw_name);
2130                         goto out;
2131                 } else {
2132                         new_fw++;
2133                 }
2134         }
2135
2136         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2137         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138         if (err) {
2139                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2140                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141                 if (err)
2142                         goto out;
2143                 if (rdev->mec_fw->size != mec_req_size) {
2144                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2145                                rdev->mec_fw->size, fw_name);
2146                         err = -EINVAL;
2147                 }
2148         } else {
2149                 err = radeon_ucode_validate(rdev->mec_fw);
2150                 if (err) {
2151                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2152                                fw_name);
2153                         goto out;
2154                 } else {
2155                         new_fw++;
2156                 }
2157         }
2158
2159         if (rdev->family == CHIP_KAVERI) {
2160                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2161                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2162                 if (err) {
2163                         goto out;
2164                 } else {
2165                         err = radeon_ucode_validate(rdev->mec2_fw);
2166                         if (err) {
2167                                 goto out;
2168                         } else {
2169                                 new_fw++;
2170                         }
2171                 }
2172         }
2173
2174         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2175         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176         if (err) {
2177                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2178                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2179                 if (err)
2180                         goto out;
2181                 if (rdev->rlc_fw->size != rlc_req_size) {
2182                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2183                                rdev->rlc_fw->size, fw_name);
2184                         err = -EINVAL;
2185                 }
2186         } else {
2187                 err = radeon_ucode_validate(rdev->rlc_fw);
2188                 if (err) {
2189                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2190                                fw_name);
2191                         goto out;
2192                 } else {
2193                         new_fw++;
2194                 }
2195         }
2196
2197         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2198         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199         if (err) {
2200                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2201                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2202                 if (err)
2203                         goto out;
2204                 if (rdev->sdma_fw->size != sdma_req_size) {
2205                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2206                                rdev->sdma_fw->size, fw_name);
2207                         err = -EINVAL;
2208                 }
2209         } else {
2210                 err = radeon_ucode_validate(rdev->sdma_fw);
2211                 if (err) {
2212                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2213                                fw_name);
2214                         goto out;
2215                 } else {
2216                         new_fw++;
2217                 }
2218         }
2219
2220         /* No SMC, MC ucode on APUs */
2221         if (!(rdev->flags & RADEON_IS_IGP)) {
2222                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2223                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224                 if (err) {
2225                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2226                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227                         if (err) {
2228                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2229                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2230                                 if (err)
2231                                         goto out;
2232                         }
2233                         if ((rdev->mc_fw->size != mc_req_size) &&
2234                             (rdev->mc_fw->size != mc2_req_size)){
2235                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2236                                        rdev->mc_fw->size, fw_name);
2237                                 err = -EINVAL;
2238                         }
2239                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2240                 } else {
2241                         err = radeon_ucode_validate(rdev->mc_fw);
2242                         if (err) {
2243                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2244                                        fw_name);
2245                                 goto out;
2246                         } else {
2247                                 new_fw++;
2248                         }
2249                 }
2250
2251                 if (new_smc)
2252                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2253                 else
2254                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2255                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256                 if (err) {
2257                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2258                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2259                         if (err) {
2260                                 pr_err("smc: error loading firmware \"%s\"\n",
2261                                        fw_name);
2262                                 release_firmware(rdev->smc_fw);
2263                                 rdev->smc_fw = NULL;
2264                                 err = 0;
2265                         } else if (rdev->smc_fw->size != smc_req_size) {
2266                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2267                                        rdev->smc_fw->size, fw_name);
2268                                 err = -EINVAL;
2269                         }
2270                 } else {
2271                         err = radeon_ucode_validate(rdev->smc_fw);
2272                         if (err) {
2273                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2274                                        fw_name);
2275                                 goto out;
2276                         } else {
2277                                 new_fw++;
2278                         }
2279                 }
2280         }
2281
2282         if (new_fw == 0) {
2283                 rdev->new_fw = false;
2284         } else if (new_fw < num_fw) {
2285                 pr_err("ci_fw: mixing new and old firmware!\n");
2286                 err = -EINVAL;
2287         } else {
2288                 rdev->new_fw = true;
2289         }
2290
2291 out:
2292         if (err) {
2293                 if (err != -EINVAL)
2294                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2295                                fw_name);
2296                 release_firmware(rdev->pfp_fw);
2297                 rdev->pfp_fw = NULL;
2298                 release_firmware(rdev->me_fw);
2299                 rdev->me_fw = NULL;
2300                 release_firmware(rdev->ce_fw);
2301                 rdev->ce_fw = NULL;
2302                 release_firmware(rdev->mec_fw);
2303                 rdev->mec_fw = NULL;
2304                 release_firmware(rdev->mec2_fw);
2305                 rdev->mec2_fw = NULL;
2306                 release_firmware(rdev->rlc_fw);
2307                 rdev->rlc_fw = NULL;
2308                 release_firmware(rdev->sdma_fw);
2309                 rdev->sdma_fw = NULL;
2310                 release_firmware(rdev->mc_fw);
2311                 rdev->mc_fw = NULL;
2312                 release_firmware(rdev->smc_fw);
2313                 rdev->smc_fw = NULL;
2314         }
2315         return err;
2316 }
2317
2318 /*
2319  * Core functions
2320  */
2321 /**
2322  * cik_tiling_mode_table_init - init the hw tiling table
2323  *
2324  * @rdev: radeon_device pointer
2325  *
2326  * Starting with SI, the tiling setup is done globally in a
2327  * set of 32 tiling modes.  Rather than selecting each set of
2328  * parameters per surface as on older asics, we just select
2329  * which index in the tiling table we want to use, and the
2330  * surface uses those parameters (CIK).
2331  */
2332 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2333 {
2334         u32 *tile = rdev->config.cik.tile_mode_array;
2335         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2336         const u32 num_tile_mode_states =
2337                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2338         const u32 num_secondary_tile_mode_states =
2339                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2340         u32 reg_offset, split_equal_to_row_size;
2341         u32 num_pipe_configs;
2342         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2343                 rdev->config.cik.max_shader_engines;
2344
2345         switch (rdev->config.cik.mem_row_size_in_kb) {
2346         case 1:
2347                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2348                 break;
2349         case 2:
2350         default:
2351                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2352                 break;
2353         case 4:
2354                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2355                 break;
2356         }
2357
2358         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2359         if (num_pipe_configs > 8)
2360                 num_pipe_configs = 16;
2361
2362         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2363                 tile[reg_offset] = 0;
2364         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2365                 macrotile[reg_offset] = 0;
2366
2367         switch(num_pipe_configs) {
2368         case 16:
2369                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2373                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2377                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2379                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2381                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2383                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2385                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                            TILE_SPLIT(split_equal_to_row_size));
2389                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2392                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2393                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2396                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2397                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2398                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                            TILE_SPLIT(split_equal_to_row_size));
2400                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2401                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2402                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2405                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2410                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2411                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2412                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2413                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2414                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2420                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2426                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2427                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2433                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2435                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2439                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2442                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2444                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2445                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447
2448                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451                            NUM_BANKS(ADDR_SURF_16_BANK));
2452                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2454                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455                            NUM_BANKS(ADDR_SURF_16_BANK));
2456                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                            NUM_BANKS(ADDR_SURF_16_BANK));
2460                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                            NUM_BANKS(ADDR_SURF_16_BANK));
2464                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467                            NUM_BANKS(ADDR_SURF_8_BANK));
2468                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                            NUM_BANKS(ADDR_SURF_4_BANK));
2472                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                            NUM_BANKS(ADDR_SURF_2_BANK));
2476                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479                            NUM_BANKS(ADDR_SURF_16_BANK));
2480                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2482                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2483                            NUM_BANKS(ADDR_SURF_16_BANK));
2484                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                             NUM_BANKS(ADDR_SURF_16_BANK));
2488                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                             NUM_BANKS(ADDR_SURF_8_BANK));
2492                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495                             NUM_BANKS(ADDR_SURF_4_BANK));
2496                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                             NUM_BANKS(ADDR_SURF_2_BANK));
2500                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503                             NUM_BANKS(ADDR_SURF_2_BANK));
2504
2505                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2506                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2507                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2508                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2509                 break;
2510
2511         case 8:
2512                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2516                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2520                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2522                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2524                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2528                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                            TILE_SPLIT(split_equal_to_row_size));
2532                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2536                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2537                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2539                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2540                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2541                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542                            TILE_SPLIT(split_equal_to_row_size));
2543                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2544                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2545                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2548                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2557                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2558                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2560                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2561                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2563                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2568                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2570                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2572                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2578                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2582                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2585                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2586                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2587                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590
2591                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2593                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2594                                 NUM_BANKS(ADDR_SURF_16_BANK));
2595                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2597                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2598                                 NUM_BANKS(ADDR_SURF_16_BANK));
2599                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2602                                 NUM_BANKS(ADDR_SURF_16_BANK));
2603                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2606                                 NUM_BANKS(ADDR_SURF_16_BANK));
2607                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2610                                 NUM_BANKS(ADDR_SURF_8_BANK));
2611                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2614                                 NUM_BANKS(ADDR_SURF_4_BANK));
2615                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2618                                 NUM_BANKS(ADDR_SURF_2_BANK));
2619                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2626                                 NUM_BANKS(ADDR_SURF_16_BANK));
2627                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2630                                 NUM_BANKS(ADDR_SURF_16_BANK));
2631                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2638                                 NUM_BANKS(ADDR_SURF_8_BANK));
2639                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642                                 NUM_BANKS(ADDR_SURF_4_BANK));
2643                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2646                                 NUM_BANKS(ADDR_SURF_2_BANK));
2647
2648                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2649                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2650                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2651                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2652                 break;
2653
2654         case 4:
2655                 if (num_rbs == 4) {
2656                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2660                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2664                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2666                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2668                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2672                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                            TILE_SPLIT(split_equal_to_row_size));
2676                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2679                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2683                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2684                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2685                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                            TILE_SPLIT(split_equal_to_row_size));
2687                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2688                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2689                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2692                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2701                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2704                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2707                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2716                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2722                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2723                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2731                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734
2735                 } else if (num_rbs < 4) {
2736                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2740                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2744                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2746                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2748                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2752                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755                            TILE_SPLIT(split_equal_to_row_size));
2756                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2757                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2759                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2760                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2761                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2762                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2763                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2764                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2765                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2766                            TILE_SPLIT(split_equal_to_row_size));
2767                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2768                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2769                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2770                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2772                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2774                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2775                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2776                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2777                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2778                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2780                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2781                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2782                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2784                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2785                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2787                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2789                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2791                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2792                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2793                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2794                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2796                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2797                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2800                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2802                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2803                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2804                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2808                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2809                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2811                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814                 }
2815
2816                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819                                 NUM_BANKS(ADDR_SURF_16_BANK));
2820                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2822                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2823                                 NUM_BANKS(ADDR_SURF_16_BANK));
2824                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827                                 NUM_BANKS(ADDR_SURF_16_BANK));
2828                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839                                 NUM_BANKS(ADDR_SURF_8_BANK));
2840                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2843                                 NUM_BANKS(ADDR_SURF_4_BANK));
2844                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2863                                 NUM_BANKS(ADDR_SURF_16_BANK));
2864                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2867                                 NUM_BANKS(ADDR_SURF_8_BANK));
2868                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2871                                 NUM_BANKS(ADDR_SURF_4_BANK));
2872
2873                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2874                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2875                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2876                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2877                 break;
2878
2879         case 2:
2880                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882                            PIPE_CONFIG(ADDR_SURF_P2) |
2883                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2884                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886                            PIPE_CONFIG(ADDR_SURF_P2) |
2887                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2888                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2890                            PIPE_CONFIG(ADDR_SURF_P2) |
2891                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2892                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894                            PIPE_CONFIG(ADDR_SURF_P2) |
2895                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2896                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898                            PIPE_CONFIG(ADDR_SURF_P2) |
2899                            TILE_SPLIT(split_equal_to_row_size));
2900                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2901                            PIPE_CONFIG(ADDR_SURF_P2) |
2902                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2904                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905                            PIPE_CONFIG(ADDR_SURF_P2) |
2906                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2907                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2908                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2909                            PIPE_CONFIG(ADDR_SURF_P2) |
2910                            TILE_SPLIT(split_equal_to_row_size));
2911                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2912                            PIPE_CONFIG(ADDR_SURF_P2);
2913                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2914                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915                            PIPE_CONFIG(ADDR_SURF_P2));
2916                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2918                             PIPE_CONFIG(ADDR_SURF_P2) |
2919                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2922                             PIPE_CONFIG(ADDR_SURF_P2) |
2923                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2925                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2926                             PIPE_CONFIG(ADDR_SURF_P2) |
2927                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929                             PIPE_CONFIG(ADDR_SURF_P2) |
2930                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2931                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2933                             PIPE_CONFIG(ADDR_SURF_P2) |
2934                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2935                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2937                             PIPE_CONFIG(ADDR_SURF_P2) |
2938                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2940                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2941                             PIPE_CONFIG(ADDR_SURF_P2) |
2942                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945                             PIPE_CONFIG(ADDR_SURF_P2));
2946                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2947                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2948                             PIPE_CONFIG(ADDR_SURF_P2) |
2949                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2951                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2952                             PIPE_CONFIG(ADDR_SURF_P2) |
2953                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2955                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2956                             PIPE_CONFIG(ADDR_SURF_P2) |
2957                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958
2959                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2960                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2961                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962                                 NUM_BANKS(ADDR_SURF_16_BANK));
2963                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2964                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2965                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966                                 NUM_BANKS(ADDR_SURF_16_BANK));
2967                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2969                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970                                 NUM_BANKS(ADDR_SURF_16_BANK));
2971                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2974                                 NUM_BANKS(ADDR_SURF_16_BANK));
2975                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                 NUM_BANKS(ADDR_SURF_16_BANK));
2979                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                 NUM_BANKS(ADDR_SURF_16_BANK));
2983                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2986                                 NUM_BANKS(ADDR_SURF_8_BANK));
2987                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3005                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006                                 NUM_BANKS(ADDR_SURF_16_BANK));
3007                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3014                                 NUM_BANKS(ADDR_SURF_8_BANK));
3015
3016                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3017                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3018                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3019                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3020                 break;
3021
3022         default:
3023                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3024         }
3025 }
3026
3027 /**
3028  * cik_select_se_sh - select which SE, SH to address
3029  *
3030  * @rdev: radeon_device pointer
3031  * @se_num: shader engine to address
3032  * @sh_num: sh block to address
3033  *
3034  * Select which SE, SH combinations to address. Certain
3035  * registers are instanced per SE or SH.  0xffffffff means
3036  * broadcast to all SEs or SHs (CIK).
3037  */
3038 static void cik_select_se_sh(struct radeon_device *rdev,
3039                              u32 se_num, u32 sh_num)
3040 {
3041         u32 data = INSTANCE_BROADCAST_WRITES;
3042
3043         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3044                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3045         else if (se_num == 0xffffffff)
3046                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3047         else if (sh_num == 0xffffffff)
3048                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3049         else
3050                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3051         WREG32(GRBM_GFX_INDEX, data);
3052 }
3053
3054 /**
3055  * cik_create_bitmask - create a bitmask
3056  *
3057  * @bit_width: length of the mask
3058  *
3059  * create a variable length bit mask (CIK).
3060  * Returns the bitmask.
3061  */
3062 static u32 cik_create_bitmask(u32 bit_width)
3063 {
3064         u32 i, mask = 0;
3065
3066         for (i = 0; i < bit_width; i++) {
3067                 mask <<= 1;
3068                 mask |= 1;
3069         }
3070         return mask;
3071 }
3072
3073 /**
3074  * cik_get_rb_disabled - computes the mask of disabled RBs
3075  *
3076  * @rdev: radeon_device pointer
3077  * @max_rb_num: max RBs (render backends) for the asic
3078  * @se_num: number of SEs (shader engines) for the asic
3079  * @sh_per_se: number of SH blocks per SE for the asic
3080  *
3081  * Calculates the bitmask of disabled RBs (CIK).
3082  * Returns the disabled RB bitmask.
3083  */
3084 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3085                               u32 max_rb_num_per_se,
3086                               u32 sh_per_se)
3087 {
3088         u32 data, mask;
3089
3090         data = RREG32(CC_RB_BACKEND_DISABLE);
3091         if (data & 1)
3092                 data &= BACKEND_DISABLE_MASK;
3093         else
3094                 data = 0;
3095         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3096
3097         data >>= BACKEND_DISABLE_SHIFT;
3098
3099         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3100
3101         return data & mask;
3102 }
3103
3104 /**
3105  * cik_setup_rb - setup the RBs on the asic
3106  *
3107  * @rdev: radeon_device pointer
3108  * @se_num: number of SEs (shader engines) for the asic
3109  * @sh_per_se: number of SH blocks per SE for the asic
3110  * @max_rb_num: max RBs (render backends) for the asic
3111  *
3112  * Configures per-SE/SH RB registers (CIK).
3113  */
3114 static void cik_setup_rb(struct radeon_device *rdev,
3115                          u32 se_num, u32 sh_per_se,
3116                          u32 max_rb_num_per_se)
3117 {
3118         int i, j;
3119         u32 data, mask;
3120         u32 disabled_rbs = 0;
3121         u32 enabled_rbs = 0;
3122
3123         mutex_lock(&rdev->grbm_idx_mutex);
3124         for (i = 0; i < se_num; i++) {
3125                 for (j = 0; j < sh_per_se; j++) {
3126                         cik_select_se_sh(rdev, i, j);
3127                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128                         if (rdev->family == CHIP_HAWAII)
3129                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130                         else
3131                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132                 }
3133         }
3134         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135         mutex_unlock(&rdev->grbm_idx_mutex);
3136
3137         mask = 1;
3138         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3139                 if (!(disabled_rbs & mask))
3140                         enabled_rbs |= mask;
3141                 mask <<= 1;
3142         }
3143
3144         rdev->config.cik.backend_enable_mask = enabled_rbs;
3145
3146         mutex_lock(&rdev->grbm_idx_mutex);
3147         for (i = 0; i < se_num; i++) {
3148                 cik_select_se_sh(rdev, i, 0xffffffff);
3149                 data = 0;
3150                 for (j = 0; j < sh_per_se; j++) {
3151                         switch (enabled_rbs & 3) {
3152                         case 0:
3153                                 if (j == 0)
3154                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3155                                 else
3156                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3157                                 break;
3158                         case 1:
3159                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3160                                 break;
3161                         case 2:
3162                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3163                                 break;
3164                         case 3:
3165                         default:
3166                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3167                                 break;
3168                         }
3169                         enabled_rbs >>= 2;
3170                 }
3171                 WREG32(PA_SC_RASTER_CONFIG, data);
3172         }
3173         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3174         mutex_unlock(&rdev->grbm_idx_mutex);
3175 }
3176
3177 /**
3178  * cik_gpu_init - setup the 3D engine
3179  *
3180  * @rdev: radeon_device pointer
3181  *
3182  * Configures the 3D engine and tiling configuration
3183  * registers so that the 3D engine is usable.
3184  */
3185 static void cik_gpu_init(struct radeon_device *rdev)
3186 {
3187         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3188         u32 mc_shared_chmap, mc_arb_ramcfg;
3189         u32 hdp_host_path_cntl;
3190         u32 tmp;
3191         int i, j;
3192
3193         switch (rdev->family) {
3194         case CHIP_BONAIRE:
3195                 rdev->config.cik.max_shader_engines = 2;
3196                 rdev->config.cik.max_tile_pipes = 4;
3197                 rdev->config.cik.max_cu_per_sh = 7;
3198                 rdev->config.cik.max_sh_per_se = 1;
3199                 rdev->config.cik.max_backends_per_se = 2;
3200                 rdev->config.cik.max_texture_channel_caches = 4;
3201                 rdev->config.cik.max_gprs = 256;
3202                 rdev->config.cik.max_gs_threads = 32;
3203                 rdev->config.cik.max_hw_contexts = 8;
3204
3205                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3210                 break;
3211         case CHIP_HAWAII:
3212                 rdev->config.cik.max_shader_engines = 4;
3213                 rdev->config.cik.max_tile_pipes = 16;
3214                 rdev->config.cik.max_cu_per_sh = 11;
3215                 rdev->config.cik.max_sh_per_se = 1;
3216                 rdev->config.cik.max_backends_per_se = 4;
3217                 rdev->config.cik.max_texture_channel_caches = 16;
3218                 rdev->config.cik.max_gprs = 256;
3219                 rdev->config.cik.max_gs_threads = 32;
3220                 rdev->config.cik.max_hw_contexts = 8;
3221
3222                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3227                 break;
3228         case CHIP_KAVERI:
3229                 rdev->config.cik.max_shader_engines = 1;
3230                 rdev->config.cik.max_tile_pipes = 4;
3231                 if ((rdev->pdev->device == 0x1304) ||
3232                     (rdev->pdev->device == 0x1305) ||
3233                     (rdev->pdev->device == 0x130C) ||
3234                     (rdev->pdev->device == 0x130F) ||
3235                     (rdev->pdev->device == 0x1310) ||
3236                     (rdev->pdev->device == 0x1311) ||
3237                     (rdev->pdev->device == 0x131C)) {
3238                         rdev->config.cik.max_cu_per_sh = 8;
3239                         rdev->config.cik.max_backends_per_se = 2;
3240                 } else if ((rdev->pdev->device == 0x1309) ||
3241                            (rdev->pdev->device == 0x130A) ||
3242                            (rdev->pdev->device == 0x130D) ||
3243                            (rdev->pdev->device == 0x1313) ||
3244                            (rdev->pdev->device == 0x131D)) {
3245                         rdev->config.cik.max_cu_per_sh = 6;
3246                         rdev->config.cik.max_backends_per_se = 2;
3247                 } else if ((rdev->pdev->device == 0x1306) ||
3248                            (rdev->pdev->device == 0x1307) ||
3249                            (rdev->pdev->device == 0x130B) ||
3250                            (rdev->pdev->device == 0x130E) ||
3251                            (rdev->pdev->device == 0x1315) ||
3252                            (rdev->pdev->device == 0x1318) ||
3253                            (rdev->pdev->device == 0x131B)) {
3254                         rdev->config.cik.max_cu_per_sh = 4;
3255                         rdev->config.cik.max_backends_per_se = 1;
3256                 } else {
3257                         rdev->config.cik.max_cu_per_sh = 3;
3258                         rdev->config.cik.max_backends_per_se = 1;
3259                 }
3260                 rdev->config.cik.max_sh_per_se = 1;
3261                 rdev->config.cik.max_texture_channel_caches = 4;
3262                 rdev->config.cik.max_gprs = 256;
3263                 rdev->config.cik.max_gs_threads = 16;
3264                 rdev->config.cik.max_hw_contexts = 8;
3265
3266                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3267                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3268                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3269                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3270                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3271                 break;
3272         case CHIP_KABINI:
3273         case CHIP_MULLINS:
3274         default:
3275                 rdev->config.cik.max_shader_engines = 1;
3276                 rdev->config.cik.max_tile_pipes = 2;
3277                 rdev->config.cik.max_cu_per_sh = 2;
3278                 rdev->config.cik.max_sh_per_se = 1;
3279                 rdev->config.cik.max_backends_per_se = 1;
3280                 rdev->config.cik.max_texture_channel_caches = 2;
3281                 rdev->config.cik.max_gprs = 256;
3282                 rdev->config.cik.max_gs_threads = 16;
3283                 rdev->config.cik.max_hw_contexts = 8;
3284
3285                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3286                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3287                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3288                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3289                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3290                 break;
3291         }
3292
3293         /* Initialize HDP */
3294         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3295                 WREG32((0x2c14 + j), 0x00000000);
3296                 WREG32((0x2c18 + j), 0x00000000);
3297                 WREG32((0x2c1c + j), 0x00000000);
3298                 WREG32((0x2c20 + j), 0x00000000);
3299                 WREG32((0x2c24 + j), 0x00000000);
3300         }
3301
3302         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3303         WREG32(SRBM_INT_CNTL, 0x1);
3304         WREG32(SRBM_INT_ACK, 0x1);
3305
3306         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3307
3308         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3309         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3310
3311         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3312         rdev->config.cik.mem_max_burst_length_bytes = 256;
3313         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3314         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3315         if (rdev->config.cik.mem_row_size_in_kb > 4)
3316                 rdev->config.cik.mem_row_size_in_kb = 4;
3317         /* XXX use MC settings? */
3318         rdev->config.cik.shader_engine_tile_size = 32;
3319         rdev->config.cik.num_gpus = 1;
3320         rdev->config.cik.multi_gpu_tile_size = 64;
3321
3322         /* fix up row size */
3323         gb_addr_config &= ~ROW_SIZE_MASK;
3324         switch (rdev->config.cik.mem_row_size_in_kb) {
3325         case 1:
3326         default:
3327                 gb_addr_config |= ROW_SIZE(0);
3328                 break;
3329         case 2:
3330                 gb_addr_config |= ROW_SIZE(1);
3331                 break;
3332         case 4:
3333                 gb_addr_config |= ROW_SIZE(2);
3334                 break;
3335         }
3336
3337         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3338          * not have bank info, so create a custom tiling dword.
3339          * bits 3:0   num_pipes
3340          * bits 7:4   num_banks
3341          * bits 11:8  group_size
3342          * bits 15:12 row_size
3343          */
3344         rdev->config.cik.tile_config = 0;
3345         switch (rdev->config.cik.num_tile_pipes) {
3346         case 1:
3347                 rdev->config.cik.tile_config |= (0 << 0);
3348                 break;
3349         case 2:
3350                 rdev->config.cik.tile_config |= (1 << 0);
3351                 break;
3352         case 4:
3353                 rdev->config.cik.tile_config |= (2 << 0);
3354                 break;
3355         case 8:
3356         default:
3357                 /* XXX what about 12? */
3358                 rdev->config.cik.tile_config |= (3 << 0);
3359                 break;
3360         }
3361         rdev->config.cik.tile_config |=
3362                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3363         rdev->config.cik.tile_config |=
3364                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3365         rdev->config.cik.tile_config |=
3366                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3367
3368         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3369         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3370         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3371         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3372         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3373         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3374         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3375         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3376
3377         cik_tiling_mode_table_init(rdev);
3378
3379         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3380                      rdev->config.cik.max_sh_per_se,
3381                      rdev->config.cik.max_backends_per_se);
3382
3383         rdev->config.cik.active_cus = 0;
3384         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3385                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3386                         rdev->config.cik.active_cus +=
3387                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3388                 }
3389         }
3390
3391         /* set HW defaults for 3D engine */
3392         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3393
3394         mutex_lock(&rdev->grbm_idx_mutex);
3395         /*
3396          * making sure that the following register writes will be broadcasted
3397          * to all the shaders
3398          */
3399         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3400         WREG32(SX_DEBUG_1, 0x20);
3401
3402         WREG32(TA_CNTL_AUX, 0x00010000);
3403
3404         tmp = RREG32(SPI_CONFIG_CNTL);
3405         tmp |= 0x03000000;
3406         WREG32(SPI_CONFIG_CNTL, tmp);
3407
3408         WREG32(SQ_CONFIG, 1);
3409
3410         WREG32(DB_DEBUG, 0);
3411
3412         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3413         tmp |= 0x00000400;
3414         WREG32(DB_DEBUG2, tmp);
3415
3416         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3417         tmp |= 0x00020200;
3418         WREG32(DB_DEBUG3, tmp);
3419
3420         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3421         tmp |= 0x00018208;
3422         WREG32(CB_HW_CONTROL, tmp);
3423
3424         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3425
3426         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3427                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3428                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3429                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3430
3431         WREG32(VGT_NUM_INSTANCES, 1);
3432
3433         WREG32(CP_PERFMON_CNTL, 0);
3434
3435         WREG32(SQ_CONFIG, 0);
3436
3437         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3438                                           FORCE_EOV_MAX_REZ_CNT(255)));
3439
3440         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3441                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3442
3443         WREG32(VGT_GS_VERTEX_REUSE, 16);
3444         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3445
3446         tmp = RREG32(HDP_MISC_CNTL);
3447         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3448         WREG32(HDP_MISC_CNTL, tmp);
3449
3450         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3451         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3452
3453         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3454         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3455         mutex_unlock(&rdev->grbm_idx_mutex);
3456
3457         udelay(50);
3458 }
3459
3460 /*
3461  * GPU scratch registers helpers function.
3462  */
3463 /**
3464  * cik_scratch_init - setup driver info for CP scratch regs
3465  *
3466  * @rdev: radeon_device pointer
3467  *
3468  * Set up the number and offset of the CP scratch registers.
3469  * NOTE: use of CP scratch registers is a legacy inferface and
3470  * is not used by default on newer asics (r6xx+).  On newer asics,
3471  * memory buffers are used for fences rather than scratch regs.
3472  */
3473 static void cik_scratch_init(struct radeon_device *rdev)
3474 {
3475         int i;
3476
3477         rdev->scratch.num_reg = 7;
3478         rdev->scratch.reg_base = SCRATCH_REG0;
3479         for (i = 0; i < rdev->scratch.num_reg; i++) {
3480                 rdev->scratch.free[i] = true;
3481                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3482         }
3483 }
3484
3485 /**
3486  * cik_ring_test - basic gfx ring test
3487  *
3488  * @rdev: radeon_device pointer
3489  * @ring: radeon_ring structure holding ring information
3490  *
3491  * Allocate a scratch register and write to it using the gfx ring (CIK).
3492  * Provides a basic gfx ring test to verify that the ring is working.
3493  * Used by cik_cp_gfx_resume();
3494  * Returns 0 on success, error on failure.
3495  */
3496 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3497 {
3498         uint32_t scratch;
3499         uint32_t tmp = 0;
3500         unsigned i;
3501         int r;
3502
3503         r = radeon_scratch_get(rdev, &scratch);
3504         if (r) {
3505                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3506                 return r;
3507         }
3508         WREG32(scratch, 0xCAFEDEAD);
3509         r = radeon_ring_lock(rdev, ring, 3);
3510         if (r) {
3511                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3512                 radeon_scratch_free(rdev, scratch);
3513                 return r;
3514         }
3515         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3516         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3517         radeon_ring_write(ring, 0xDEADBEEF);
3518         radeon_ring_unlock_commit(rdev, ring, false);
3519
3520         for (i = 0; i < rdev->usec_timeout; i++) {
3521                 tmp = RREG32(scratch);
3522                 if (tmp == 0xDEADBEEF)
3523                         break;
3524                 DRM_UDELAY(1);
3525         }
3526         if (i < rdev->usec_timeout) {
3527                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3528         } else {
3529                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3530                           ring->idx, scratch, tmp);
3531                 r = -EINVAL;
3532         }
3533         radeon_scratch_free(rdev, scratch);
3534         return r;
3535 }
3536
3537 /**
3538  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3539  *
3540  * @rdev: radeon_device pointer
3541  * @ridx: radeon ring index
3542  *
3543  * Emits an hdp flush on the cp.
3544  */
3545 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3546                                        int ridx)
3547 {
3548         struct radeon_ring *ring = &rdev->ring[ridx];
3549         u32 ref_and_mask;
3550
3551         switch (ring->idx) {
3552         case CAYMAN_RING_TYPE_CP1_INDEX:
3553         case CAYMAN_RING_TYPE_CP2_INDEX:
3554         default:
3555                 switch (ring->me) {
3556                 case 0:
3557                         ref_and_mask = CP2 << ring->pipe;
3558                         break;
3559                 case 1:
3560                         ref_and_mask = CP6 << ring->pipe;
3561                         break;
3562                 default:
3563                         return;
3564                 }
3565                 break;
3566         case RADEON_RING_TYPE_GFX_INDEX:
3567                 ref_and_mask = CP0;
3568                 break;
3569         }
3570
3571         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3572         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3573                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3574                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3575         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3576         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3577         radeon_ring_write(ring, ref_and_mask);
3578         radeon_ring_write(ring, ref_and_mask);
3579         radeon_ring_write(ring, 0x20); /* poll interval */
3580 }
3581
3582 /**
3583  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3584  *
3585  * @rdev: radeon_device pointer
3586  * @fence: radeon fence object
3587  *
3588  * Emits a fence sequnce number on the gfx ring and flushes
3589  * GPU caches.
3590  */
3591 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3592                              struct radeon_fence *fence)
3593 {
3594         struct radeon_ring *ring = &rdev->ring[fence->ring];
3595         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3596
3597         /* Workaround for cache flush problems. First send a dummy EOP
3598          * event down the pipe with seq one below.
3599          */
3600         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3601         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3602                                  EOP_TC_ACTION_EN |
3603                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3604                                  EVENT_INDEX(5)));
3605         radeon_ring_write(ring, addr & 0xfffffffc);
3606         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3607                                 DATA_SEL(1) | INT_SEL(0));
3608         radeon_ring_write(ring, fence->seq - 1);
3609         radeon_ring_write(ring, 0);
3610
3611         /* Then send the real EOP event down the pipe. */
3612         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3613         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3614                                  EOP_TC_ACTION_EN |
3615                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3616                                  EVENT_INDEX(5)));
3617         radeon_ring_write(ring, addr & 0xfffffffc);
3618         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3619         radeon_ring_write(ring, fence->seq);
3620         radeon_ring_write(ring, 0);
3621 }
3622
3623 /**
3624  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3625  *
3626  * @rdev: radeon_device pointer
3627  * @fence: radeon fence object
3628  *
3629  * Emits a fence sequnce number on the compute ring and flushes
3630  * GPU caches.
3631  */
3632 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3633                                  struct radeon_fence *fence)
3634 {
3635         struct radeon_ring *ring = &rdev->ring[fence->ring];
3636         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3637
3638         /* RELEASE_MEM - flush caches, send int */
3639         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3640         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3641                                  EOP_TC_ACTION_EN |
3642                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3643                                  EVENT_INDEX(5)));
3644         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3645         radeon_ring_write(ring, addr & 0xfffffffc);
3646         radeon_ring_write(ring, upper_32_bits(addr));
3647         radeon_ring_write(ring, fence->seq);
3648         radeon_ring_write(ring, 0);
3649 }
3650
3651 /**
3652  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3653  *
3654  * @rdev: radeon_device pointer
3655  * @ring: radeon ring buffer object
3656  * @semaphore: radeon semaphore object
3657  * @emit_wait: Is this a sempahore wait?
3658  *
3659  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3660  * from running ahead of semaphore waits.
3661  */
3662 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3663                              struct radeon_ring *ring,
3664                              struct radeon_semaphore *semaphore,
3665                              bool emit_wait)
3666 {
3667         uint64_t addr = semaphore->gpu_addr;
3668         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3669
3670         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3671         radeon_ring_write(ring, lower_32_bits(addr));
3672         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3673
3674         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3675                 /* Prevent the PFP from running ahead of the semaphore wait */
3676                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3677                 radeon_ring_write(ring, 0x0);
3678         }
3679
3680         return true;
3681 }
3682
3683 /**
3684  * cik_copy_cpdma - copy pages using the CP DMA engine
3685  *
3686  * @rdev: radeon_device pointer
3687  * @src_offset: src GPU address
3688  * @dst_offset: dst GPU address
3689  * @num_gpu_pages: number of GPU pages to xfer
3690  * @resv: reservation object to sync to
3691  *
3692  * Copy GPU paging using the CP DMA engine (CIK+).
3693  * Used by the radeon ttm implementation to move pages if
3694  * registered as the asic copy callback.
3695  */
3696 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3697                                     uint64_t src_offset, uint64_t dst_offset,
3698                                     unsigned num_gpu_pages,
3699                                     struct reservation_object *resv)
3700 {
3701         struct radeon_fence *fence;
3702         struct radeon_sync sync;
3703         int ring_index = rdev->asic->copy.blit_ring_index;
3704         struct radeon_ring *ring = &rdev->ring[ring_index];
3705         u32 size_in_bytes, cur_size_in_bytes, control;
3706         int i, num_loops;
3707         int r = 0;
3708
3709         radeon_sync_create(&sync);
3710
3711         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3712         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3713         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3714         if (r) {
3715                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3716                 radeon_sync_free(rdev, &sync, NULL);
3717                 return ERR_PTR(r);
3718         }
3719
3720         radeon_sync_resv(rdev, &sync, resv, false);
3721         radeon_sync_rings(rdev, &sync, ring->idx);
3722
3723         for (i = 0; i < num_loops; i++) {
3724                 cur_size_in_bytes = size_in_bytes;
3725                 if (cur_size_in_bytes > 0x1fffff)
3726                         cur_size_in_bytes = 0x1fffff;
3727                 size_in_bytes -= cur_size_in_bytes;
3728                 control = 0;
3729                 if (size_in_bytes == 0)
3730                         control |= PACKET3_DMA_DATA_CP_SYNC;
3731                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3732                 radeon_ring_write(ring, control);
3733                 radeon_ring_write(ring, lower_32_bits(src_offset));
3734                 radeon_ring_write(ring, upper_32_bits(src_offset));
3735                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3736                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3737                 radeon_ring_write(ring, cur_size_in_bytes);
3738                 src_offset += cur_size_in_bytes;
3739                 dst_offset += cur_size_in_bytes;
3740         }
3741
3742         r = radeon_fence_emit(rdev, &fence, ring->idx);
3743         if (r) {
3744                 radeon_ring_unlock_undo(rdev, ring);
3745                 radeon_sync_free(rdev, &sync, NULL);
3746                 return ERR_PTR(r);
3747         }
3748
3749         radeon_ring_unlock_commit(rdev, ring, false);
3750         radeon_sync_free(rdev, &sync, fence);
3751
3752         return fence;
3753 }
3754
3755 /*
3756  * IB stuff
3757  */
3758 /**
3759  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3760  *
3761  * @rdev: radeon_device pointer
3762  * @ib: radeon indirect buffer object
3763  *
3764  * Emits a DE (drawing engine) or CE (constant engine) IB
3765  * on the gfx ring.  IBs are usually generated by userspace
3766  * acceleration drivers and submitted to the kernel for
3767  * scheduling on the ring.  This function schedules the IB
3768  * on the gfx ring for execution by the GPU.
3769  */
3770 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3771 {
3772         struct radeon_ring *ring = &rdev->ring[ib->ring];
3773         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3774         u32 header, control = INDIRECT_BUFFER_VALID;
3775
3776         if (ib->is_const_ib) {
3777                 /* set switch buffer packet before const IB */
3778                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3779                 radeon_ring_write(ring, 0);
3780
3781                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3782         } else {
3783                 u32 next_rptr;
3784                 if (ring->rptr_save_reg) {
3785                         next_rptr = ring->wptr + 3 + 4;
3786                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3787                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3788                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3789                         radeon_ring_write(ring, next_rptr);
3790                 } else if (rdev->wb.enabled) {
3791                         next_rptr = ring->wptr + 5 + 4;
3792                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3793                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3794                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3795                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3796                         radeon_ring_write(ring, next_rptr);
3797                 }
3798
3799                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3800         }
3801
3802         control |= ib->length_dw | (vm_id << 24);
3803
3804         radeon_ring_write(ring, header);
3805         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3806         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3807         radeon_ring_write(ring, control);
3808 }
3809
3810 /**
3811  * cik_ib_test - basic gfx ring IB test
3812  *
3813  * @rdev: radeon_device pointer
3814  * @ring: radeon_ring structure holding ring information
3815  *
3816  * Allocate an IB and execute it on the gfx ring (CIK).
3817  * Provides a basic gfx ring test to verify that IBs are working.
3818  * Returns 0 on success, error on failure.
3819  */
3820 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3821 {
3822         struct radeon_ib ib;
3823         uint32_t scratch;
3824         uint32_t tmp = 0;
3825         unsigned i;
3826         int r;
3827
3828         r = radeon_scratch_get(rdev, &scratch);
3829         if (r) {
3830                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3831                 return r;
3832         }
3833         WREG32(scratch, 0xCAFEDEAD);
3834         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3835         if (r) {
3836                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3837                 radeon_scratch_free(rdev, scratch);
3838                 return r;
3839         }
3840         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3841         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3842         ib.ptr[2] = 0xDEADBEEF;
3843         ib.length_dw = 3;
3844         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3845         if (r) {
3846                 radeon_scratch_free(rdev, scratch);
3847                 radeon_ib_free(rdev, &ib);
3848                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3849                 return r;
3850         }
3851         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3852                 RADEON_USEC_IB_TEST_TIMEOUT));
3853         if (r < 0) {
3854                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3855                 radeon_scratch_free(rdev, scratch);
3856                 radeon_ib_free(rdev, &ib);
3857                 return r;
3858         } else if (r == 0) {
3859                 DRM_ERROR("radeon: fence wait timed out.\n");
3860                 radeon_scratch_free(rdev, scratch);
3861                 radeon_ib_free(rdev, &ib);
3862                 return -ETIMEDOUT;
3863         }
3864         r = 0;
3865         for (i = 0; i < rdev->usec_timeout; i++) {
3866                 tmp = RREG32(scratch);
3867                 if (tmp == 0xDEADBEEF)
3868                         break;
3869                 DRM_UDELAY(1);
3870         }
3871         if (i < rdev->usec_timeout) {
3872                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3873         } else {
3874                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3875                           scratch, tmp);
3876                 r = -EINVAL;
3877         }
3878         radeon_scratch_free(rdev, scratch);
3879         radeon_ib_free(rdev, &ib);
3880         return r;
3881 }
3882
3883 /*
3884  * CP.
3885  * On CIK, gfx and compute now have independant command processors.
3886  *
3887  * GFX
3888  * Gfx consists of a single ring and can process both gfx jobs and
3889  * compute jobs.  The gfx CP consists of three microengines (ME):
3890  * PFP - Pre-Fetch Parser
3891  * ME - Micro Engine
3892  * CE - Constant Engine
3893  * The PFP and ME make up what is considered the Drawing Engine (DE).
3894  * The CE is an asynchronous engine used for updating buffer desciptors
3895  * used by the DE so that they can be loaded into cache in parallel
3896  * while the DE is processing state update packets.
3897  *
3898  * Compute
3899  * The compute CP consists of two microengines (ME):
3900  * MEC1 - Compute MicroEngine 1
3901  * MEC2 - Compute MicroEngine 2
3902  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3903  * The queues are exposed to userspace and are programmed directly
3904  * by the compute runtime.
3905  */
3906 /**
3907  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3908  *
3909  * @rdev: radeon_device pointer
3910  * @enable: enable or disable the MEs
3911  *
3912  * Halts or unhalts the gfx MEs.
3913  */
3914 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3915 {
3916         if (enable)
3917                 WREG32(CP_ME_CNTL, 0);
3918         else {
3919                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3920                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3921                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3922                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3923         }
3924         udelay(50);
3925 }
3926
3927 /**
3928  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3929  *
3930  * @rdev: radeon_device pointer
3931  *
3932  * Loads the gfx PFP, ME, and CE ucode.
3933  * Returns 0 for success, -EINVAL if the ucode is not available.
3934  */
3935 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3936 {
3937         int i;
3938
3939         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3940                 return -EINVAL;
3941
3942         cik_cp_gfx_enable(rdev, false);
3943
3944         if (rdev->new_fw) {
3945                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3946                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3947                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3948                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3949                 const struct gfx_firmware_header_v1_0 *me_hdr =
3950                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3951                 const __le32 *fw_data;
3952                 u32 fw_size;
3953
3954                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3955                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3956                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3957
3958                 /* PFP */
3959                 fw_data = (const __le32 *)
3960                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3961                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3962                 WREG32(CP_PFP_UCODE_ADDR, 0);
3963                 for (i = 0; i < fw_size; i++)
3964                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3965                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3966
3967                 /* CE */
3968                 fw_data = (const __le32 *)
3969                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3970                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3971                 WREG32(CP_CE_UCODE_ADDR, 0);
3972                 for (i = 0; i < fw_size; i++)
3973                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3974                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3975
3976                 /* ME */
3977                 fw_data = (const __be32 *)
3978                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3979                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3980                 WREG32(CP_ME_RAM_WADDR, 0);
3981                 for (i = 0; i < fw_size; i++)
3982                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3983                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3984                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3985         } else {
3986                 const __be32 *fw_data;
3987
3988                 /* PFP */
3989                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3990                 WREG32(CP_PFP_UCODE_ADDR, 0);
3991                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3992                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3993                 WREG32(CP_PFP_UCODE_ADDR, 0);
3994
3995                 /* CE */
3996                 fw_data = (const __be32 *)rdev->ce_fw->data;
3997                 WREG32(CP_CE_UCODE_ADDR, 0);
3998                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3999                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4000                 WREG32(CP_CE_UCODE_ADDR, 0);
4001
4002                 /* ME */
4003                 fw_data = (const __be32 *)rdev->me_fw->data;
4004                 WREG32(CP_ME_RAM_WADDR, 0);
4005                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4006                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4007                 WREG32(CP_ME_RAM_WADDR, 0);
4008         }
4009
4010         return 0;
4011 }
4012
4013 /**
4014  * cik_cp_gfx_start - start the gfx ring
4015  *
4016  * @rdev: radeon_device pointer
4017  *
4018  * Enables the ring and loads the clear state context and other
4019  * packets required to init the ring.
4020  * Returns 0 for success, error for failure.
4021  */
4022 static int cik_cp_gfx_start(struct radeon_device *rdev)
4023 {
4024         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4025         int r, i;
4026
4027         /* init the CP */
4028         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4029         WREG32(CP_ENDIAN_SWAP, 0);
4030         WREG32(CP_DEVICE_ID, 1);
4031
4032         cik_cp_gfx_enable(rdev, true);
4033
4034         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4035         if (r) {
4036                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4037                 return r;
4038         }
4039
4040         /* init the CE partitions.  CE only used for gfx on CIK */
4041         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4042         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4043         radeon_ring_write(ring, 0x8000);
4044         radeon_ring_write(ring, 0x8000);
4045
4046         /* setup clear context state */
4047         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4048         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4049
4050         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4051         radeon_ring_write(ring, 0x80000000);
4052         radeon_ring_write(ring, 0x80000000);
4053
4054         for (i = 0; i < cik_default_size; i++)
4055                 radeon_ring_write(ring, cik_default_state[i]);
4056
4057         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4058         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4059
4060         /* set clear context state */
4061         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4062         radeon_ring_write(ring, 0);
4063
4064         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4065         radeon_ring_write(ring, 0x00000316);
4066         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4067         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4068
4069         radeon_ring_unlock_commit(rdev, ring, false);
4070
4071         return 0;
4072 }
4073
4074 /**
4075  * cik_cp_gfx_fini - stop the gfx ring
4076  *
4077  * @rdev: radeon_device pointer
4078  *
4079  * Stop the gfx ring and tear down the driver ring
4080  * info.
4081  */
4082 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4083 {
4084         cik_cp_gfx_enable(rdev, false);
4085         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4086 }
4087
4088 /**
4089  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4090  *
4091  * @rdev: radeon_device pointer
4092  *
4093  * Program the location and size of the gfx ring buffer
4094  * and test it to make sure it's working.
4095  * Returns 0 for success, error for failure.
4096  */
4097 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4098 {
4099         struct radeon_ring *ring;
4100         u32 tmp;
4101         u32 rb_bufsz;
4102         u64 rb_addr;
4103         int r;
4104
4105         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4106         if (rdev->family != CHIP_HAWAII)
4107                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4108
4109         /* Set the write pointer delay */
4110         WREG32(CP_RB_WPTR_DELAY, 0);
4111
4112         /* set the RB to use vmid 0 */
4113         WREG32(CP_RB_VMID, 0);
4114
4115         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4116
4117         /* ring 0 - compute and gfx */
4118         /* Set ring buffer size */
4119         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4120         rb_bufsz = order_base_2(ring->ring_size / 8);
4121         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4122 #ifdef __BIG_ENDIAN
4123         tmp |= BUF_SWAP_32BIT;
4124 #endif
4125         WREG32(CP_RB0_CNTL, tmp);
4126
4127         /* Initialize the ring buffer's read and write pointers */
4128         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4129         ring->wptr = 0;
4130         WREG32(CP_RB0_WPTR, ring->wptr);
4131
4132         /* set the wb address wether it's enabled or not */
4133         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4134         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4135
4136         /* scratch register shadowing is no longer supported */
4137         WREG32(SCRATCH_UMSK, 0);
4138
4139         if (!rdev->wb.enabled)
4140                 tmp |= RB_NO_UPDATE;
4141
4142         mdelay(1);
4143         WREG32(CP_RB0_CNTL, tmp);
4144
4145         rb_addr = ring->gpu_addr >> 8;
4146         WREG32(CP_RB0_BASE, rb_addr);
4147         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4148
4149         /* start the ring */
4150         cik_cp_gfx_start(rdev);
4151         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4152         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4153         if (r) {
4154                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4155                 return r;
4156         }
4157
4158         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4159                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4160
4161         return 0;
4162 }
4163
4164 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4165                      struct radeon_ring *ring)
4166 {
4167         u32 rptr;
4168
4169         if (rdev->wb.enabled)
4170                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4171         else
4172                 rptr = RREG32(CP_RB0_RPTR);
4173
4174         return rptr;
4175 }
4176
4177 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4178                      struct radeon_ring *ring)
4179 {
4180         return RREG32(CP_RB0_WPTR);
4181 }
4182
4183 void cik_gfx_set_wptr(struct radeon_device *rdev,
4184                       struct radeon_ring *ring)
4185 {
4186         WREG32(CP_RB0_WPTR, ring->wptr);
4187         (void)RREG32(CP_RB0_WPTR);
4188 }
4189
4190 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4191                          struct radeon_ring *ring)
4192 {
4193         u32 rptr;
4194
4195         if (rdev->wb.enabled) {
4196                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4197         } else {
4198                 mutex_lock(&rdev->srbm_mutex);
4199                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4200                 rptr = RREG32(CP_HQD_PQ_RPTR);
4201                 cik_srbm_select(rdev, 0, 0, 0, 0);
4202                 mutex_unlock(&rdev->srbm_mutex);
4203         }
4204
4205         return rptr;
4206 }
4207
4208 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4209                          struct radeon_ring *ring)
4210 {
4211         u32 wptr;
4212
4213         if (rdev->wb.enabled) {
4214                 /* XXX check if swapping is necessary on BE */
4215                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4216         } else {
4217                 mutex_lock(&rdev->srbm_mutex);
4218                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4219                 wptr = RREG32(CP_HQD_PQ_WPTR);
4220                 cik_srbm_select(rdev, 0, 0, 0, 0);
4221                 mutex_unlock(&rdev->srbm_mutex);
4222         }
4223
4224         return wptr;
4225 }
4226
4227 void cik_compute_set_wptr(struct radeon_device *rdev,
4228                           struct radeon_ring *ring)
4229 {
4230         /* XXX check if swapping is necessary on BE */
4231         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4232         WDOORBELL32(ring->doorbell_index, ring->wptr);
4233 }
4234
4235 static void cik_compute_stop(struct radeon_device *rdev,
4236                              struct radeon_ring *ring)
4237 {
4238         u32 j, tmp;
4239
4240         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4241         /* Disable wptr polling. */
4242         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4243         tmp &= ~WPTR_POLL_EN;
4244         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4245         /* Disable HQD. */
4246         if (RREG32(CP_HQD_ACTIVE) & 1) {
4247                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4248                 for (j = 0; j < rdev->usec_timeout; j++) {
4249                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4250                                 break;
4251                         udelay(1);
4252                 }
4253                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4254                 WREG32(CP_HQD_PQ_RPTR, 0);
4255                 WREG32(CP_HQD_PQ_WPTR, 0);
4256         }
4257         cik_srbm_select(rdev, 0, 0, 0, 0);
4258 }
4259
4260 /**
4261  * cik_cp_compute_enable - enable/disable the compute CP MEs
4262  *
4263  * @rdev: radeon_device pointer
4264  * @enable: enable or disable the MEs
4265  *
4266  * Halts or unhalts the compute MEs.
4267  */
4268 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4269 {
4270         if (enable)
4271                 WREG32(CP_MEC_CNTL, 0);
4272         else {
4273                 /*
4274                  * To make hibernation reliable we need to clear compute ring
4275                  * configuration before halting the compute ring.
4276                  */
4277                 mutex_lock(&rdev->srbm_mutex);
4278                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4279                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4280                 mutex_unlock(&rdev->srbm_mutex);
4281
4282                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4283                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4284                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4285         }
4286         udelay(50);
4287 }
4288
4289 /**
4290  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4291  *
4292  * @rdev: radeon_device pointer
4293  *
4294  * Loads the compute MEC1&2 ucode.
4295  * Returns 0 for success, -EINVAL if the ucode is not available.
4296  */
4297 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4298 {
4299         int i;
4300
4301         if (!rdev->mec_fw)
4302                 return -EINVAL;
4303
4304         cik_cp_compute_enable(rdev, false);
4305
4306         if (rdev->new_fw) {
4307                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4308                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4309                 const __le32 *fw_data;
4310                 u32 fw_size;
4311
4312                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4313
4314                 /* MEC1 */
4315                 fw_data = (const __le32 *)
4316                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4317                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4318                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4319                 for (i = 0; i < fw_size; i++)
4320                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4321                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4322
4323                 /* MEC2 */
4324                 if (rdev->family == CHIP_KAVERI) {
4325                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4326                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4327
4328                         fw_data = (const __le32 *)
4329                                 (rdev->mec2_fw->data +
4330                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4331                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4332                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4333                         for (i = 0; i < fw_size; i++)
4334                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4335                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4336                 }
4337         } else {
4338                 const __be32 *fw_data;
4339
4340                 /* MEC1 */
4341                 fw_data = (const __be32 *)rdev->mec_fw->data;
4342                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4343                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4344                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4345                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4346
4347                 if (rdev->family == CHIP_KAVERI) {
4348                         /* MEC2 */
4349                         fw_data = (const __be32 *)rdev->mec_fw->data;
4350                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4351                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4352                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4353                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4354                 }
4355         }
4356
4357         return 0;
4358 }
4359
4360 /**
4361  * cik_cp_compute_start - start the compute queues
4362  *
4363  * @rdev: radeon_device pointer
4364  *
4365  * Enable the compute queues.
4366  * Returns 0 for success, error for failure.
4367  */
4368 static int cik_cp_compute_start(struct radeon_device *rdev)
4369 {
4370         cik_cp_compute_enable(rdev, true);
4371
4372         return 0;
4373 }
4374
4375 /**
4376  * cik_cp_compute_fini - stop the compute queues
4377  *
4378  * @rdev: radeon_device pointer
4379  *
4380  * Stop the compute queues and tear down the driver queue
4381  * info.
4382  */
4383 static void cik_cp_compute_fini(struct radeon_device *rdev)
4384 {
4385         int i, idx, r;
4386
4387         cik_cp_compute_enable(rdev, false);
4388
4389         for (i = 0; i < 2; i++) {
4390                 if (i == 0)
4391                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4392                 else
4393                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4394
4395                 if (rdev->ring[idx].mqd_obj) {
4396                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4397                         if (unlikely(r != 0))
4398                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4399
4400                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4401                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4402
4403                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4404                         rdev->ring[idx].mqd_obj = NULL;
4405                 }
4406         }
4407 }
4408
4409 static void cik_mec_fini(struct radeon_device *rdev)
4410 {
4411         int r;
4412
4413         if (rdev->mec.hpd_eop_obj) {
4414                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4415                 if (unlikely(r != 0))
4416                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4417                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4418                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4419
4420                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4421                 rdev->mec.hpd_eop_obj = NULL;
4422         }
4423 }
4424
4425 #define MEC_HPD_SIZE 2048
4426
4427 static int cik_mec_init(struct radeon_device *rdev)
4428 {
4429         int r;
4430         u32 *hpd;
4431
4432         /*
4433          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4434          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4435          * Nonetheless, we assign only 1 pipe because all other pipes will
4436          * be handled by KFD
4437          */
4438         rdev->mec.num_mec = 1;
4439         rdev->mec.num_pipe = 1;
4440         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4441
4442         if (rdev->mec.hpd_eop_obj == NULL) {
4443                 r = radeon_bo_create(rdev,
4444                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4445                                      PAGE_SIZE, true,
4446                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4447                                      &rdev->mec.hpd_eop_obj);
4448                 if (r) {
4449                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4450                         return r;
4451                 }
4452         }
4453
4454         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4455         if (unlikely(r != 0)) {
4456                 cik_mec_fini(rdev);
4457                 return r;
4458         }
4459         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4460                           &rdev->mec.hpd_eop_gpu_addr);
4461         if (r) {
4462                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4463                 cik_mec_fini(rdev);
4464                 return r;
4465         }
4466         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4467         if (r) {
4468                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4469                 cik_mec_fini(rdev);
4470                 return r;
4471         }
4472
4473         /* clear memory.  Not sure if this is required or not */
4474         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4475
4476         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4477         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4478
4479         return 0;
4480 }
4481
4482 struct hqd_registers
4483 {
4484         u32 cp_mqd_base_addr;
4485         u32 cp_mqd_base_addr_hi;
4486         u32 cp_hqd_active;
4487         u32 cp_hqd_vmid;
4488         u32 cp_hqd_persistent_state;
4489         u32 cp_hqd_pipe_priority;
4490         u32 cp_hqd_queue_priority;
4491         u32 cp_hqd_quantum;
4492         u32 cp_hqd_pq_base;
4493         u32 cp_hqd_pq_base_hi;
4494         u32 cp_hqd_pq_rptr;
4495         u32 cp_hqd_pq_rptr_report_addr;
4496         u32 cp_hqd_pq_rptr_report_addr_hi;
4497         u32 cp_hqd_pq_wptr_poll_addr;
4498         u32 cp_hqd_pq_wptr_poll_addr_hi;
4499         u32 cp_hqd_pq_doorbell_control;
4500         u32 cp_hqd_pq_wptr;
4501         u32 cp_hqd_pq_control;
4502         u32 cp_hqd_ib_base_addr;
4503         u32 cp_hqd_ib_base_addr_hi;
4504         u32 cp_hqd_ib_rptr;
4505         u32 cp_hqd_ib_control;
4506         u32 cp_hqd_iq_timer;
4507         u32 cp_hqd_iq_rptr;
4508         u32 cp_hqd_dequeue_request;
4509         u32 cp_hqd_dma_offload;
4510         u32 cp_hqd_sema_cmd;
4511         u32 cp_hqd_msg_type;
4512         u32 cp_hqd_atomic0_preop_lo;
4513         u32 cp_hqd_atomic0_preop_hi;
4514         u32 cp_hqd_atomic1_preop_lo;
4515         u32 cp_hqd_atomic1_preop_hi;
4516         u32 cp_hqd_hq_scheduler0;
4517         u32 cp_hqd_hq_scheduler1;
4518         u32 cp_mqd_control;
4519 };
4520
4521 struct bonaire_mqd
4522 {
4523         u32 header;
4524         u32 dispatch_initiator;
4525         u32 dimensions[3];
4526         u32 start_idx[3];
4527         u32 num_threads[3];
4528         u32 pipeline_stat_enable;
4529         u32 perf_counter_enable;
4530         u32 pgm[2];
4531         u32 tba[2];
4532         u32 tma[2];
4533         u32 pgm_rsrc[2];
4534         u32 vmid;
4535         u32 resource_limits;
4536         u32 static_thread_mgmt01[2];
4537         u32 tmp_ring_size;
4538         u32 static_thread_mgmt23[2];
4539         u32 restart[3];
4540         u32 thread_trace_enable;
4541         u32 reserved1;
4542         u32 user_data[16];
4543         u32 vgtcs_invoke_count[2];
4544         struct hqd_registers queue_state;
4545         u32 dequeue_cntr;
4546         u32 interrupt_queue[64];
4547 };
4548
4549 /**
4550  * cik_cp_compute_resume - setup the compute queue registers
4551  *
4552  * @rdev: radeon_device pointer
4553  *
4554  * Program the compute queues and test them to make sure they
4555  * are working.
4556  * Returns 0 for success, error for failure.
4557  */
4558 static int cik_cp_compute_resume(struct radeon_device *rdev)
4559 {
4560         int r, i, j, idx;
4561         u32 tmp;
4562         bool use_doorbell = true;
4563         u64 hqd_gpu_addr;
4564         u64 mqd_gpu_addr;
4565         u64 eop_gpu_addr;
4566         u64 wb_gpu_addr;
4567         u32 *buf;
4568         struct bonaire_mqd *mqd;
4569
4570         r = cik_cp_compute_start(rdev);
4571         if (r)
4572                 return r;
4573
4574         /* fix up chicken bits */
4575         tmp = RREG32(CP_CPF_DEBUG);
4576         tmp |= (1 << 23);
4577         WREG32(CP_CPF_DEBUG, tmp);
4578
4579         /* init the pipes */
4580         mutex_lock(&rdev->srbm_mutex);
4581
4582         for (i = 0; i < rdev->mec.num_pipe; ++i) {
4583                 cik_srbm_select(rdev, 0, i, 0, 0);
4584
4585                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4586                 /* write the EOP addr */
4587                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4588                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4589
4590                 /* set the VMID assigned */
4591                 WREG32(CP_HPD_EOP_VMID, 0);
4592
4593                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4594                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4595                 tmp &= ~EOP_SIZE_MASK;
4596                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4597                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4598
4599         }
4600         mutex_unlock(&rdev->srbm_mutex);
4601
4602         /* init the queues.  Just two for now. */
4603         for (i = 0; i < 2; i++) {
4604                 if (i == 0)
4605                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4606                 else
4607                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4608
4609                 if (rdev->ring[idx].mqd_obj == NULL) {
4610                         r = radeon_bo_create(rdev,
4611                                              sizeof(struct bonaire_mqd),
4612                                              PAGE_SIZE, true,
4613                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4614                                              NULL, &rdev->ring[idx].mqd_obj);
4615                         if (r) {
4616                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4617                                 return r;
4618                         }
4619                 }
4620
4621                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4622                 if (unlikely(r != 0)) {
4623                         cik_cp_compute_fini(rdev);
4624                         return r;
4625                 }
4626                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4627                                   &mqd_gpu_addr);
4628                 if (r) {
4629                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4630                         cik_cp_compute_fini(rdev);
4631                         return r;
4632                 }
4633                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4634                 if (r) {
4635                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4636                         cik_cp_compute_fini(rdev);
4637                         return r;
4638                 }
4639
4640                 /* init the mqd struct */
4641                 memset(buf, 0, sizeof(struct bonaire_mqd));
4642
4643                 mqd = (struct bonaire_mqd *)buf;
4644                 mqd->header = 0xC0310800;
4645                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4646                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4647                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4648                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4649
4650                 mutex_lock(&rdev->srbm_mutex);
4651                 cik_srbm_select(rdev, rdev->ring[idx].me,
4652                                 rdev->ring[idx].pipe,
4653                                 rdev->ring[idx].queue, 0);
4654
4655                 /* disable wptr polling */
4656                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4657                 tmp &= ~WPTR_POLL_EN;
4658                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4659
4660                 /* enable doorbell? */
4661                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4662                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4663                 if (use_doorbell)
4664                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4665                 else
4666                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4667                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4668                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4669
4670                 /* disable the queue if it's active */
4671                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4672                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4673                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4674                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4675                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4676                         for (j = 0; j < rdev->usec_timeout; j++) {
4677                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4678                                         break;
4679                                 udelay(1);
4680                         }
4681                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4682                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4683                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4684                 }
4685
4686                 /* set the pointer to the MQD */
4687                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4688                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4689                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4690                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4691                 /* set MQD vmid to 0 */
4692                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4693                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4694                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4695
4696                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4697                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4698                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4699                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4700                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4701                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4702
4703                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4704                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4705                 mqd->queue_state.cp_hqd_pq_control &=
4706                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4707
4708                 mqd->queue_state.cp_hqd_pq_control |=
4709                         order_base_2(rdev->ring[idx].ring_size / 8);
4710                 mqd->queue_state.cp_hqd_pq_control |=
4711                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4712 #ifdef __BIG_ENDIAN
4713                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4714 #endif
4715                 mqd->queue_state.cp_hqd_pq_control &=
4716                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4717                 mqd->queue_state.cp_hqd_pq_control |=
4718                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4719                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4720
4721                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4722                 if (i == 0)
4723                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4724                 else
4725                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4726                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4727                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4728                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4729                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4730                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4731
4732                 /* set the wb address wether it's enabled or not */
4733                 if (i == 0)
4734                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4735                 else
4736                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4737                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4738                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4739                         upper_32_bits(wb_gpu_addr) & 0xffff;
4740                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4741                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4742                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4743                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4744
4745                 /* enable the doorbell if requested */
4746                 if (use_doorbell) {
4747                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4748                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4749                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4750                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4751                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4752                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4753                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4754                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4755
4756                 } else {
4757                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4758                 }
4759                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4760                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4761
4762                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4763                 rdev->ring[idx].wptr = 0;
4764                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4765                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4766                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4767
4768                 /* set the vmid for the queue */
4769                 mqd->queue_state.cp_hqd_vmid = 0;
4770                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4771
4772                 /* activate the queue */
4773                 mqd->queue_state.cp_hqd_active = 1;
4774                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4775
4776                 cik_srbm_select(rdev, 0, 0, 0, 0);
4777                 mutex_unlock(&rdev->srbm_mutex);
4778
4779                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4780                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4781
4782                 rdev->ring[idx].ready = true;
4783                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4784                 if (r)
4785                         rdev->ring[idx].ready = false;
4786         }
4787
4788         return 0;
4789 }
4790
4791 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4792 {
4793         cik_cp_gfx_enable(rdev, enable);
4794         cik_cp_compute_enable(rdev, enable);
4795 }
4796
4797 static int cik_cp_load_microcode(struct radeon_device *rdev)
4798 {
4799         int r;
4800
4801         r = cik_cp_gfx_load_microcode(rdev);
4802         if (r)
4803                 return r;
4804         r = cik_cp_compute_load_microcode(rdev);
4805         if (r)
4806                 return r;
4807
4808         return 0;
4809 }
4810
4811 static void cik_cp_fini(struct radeon_device *rdev)
4812 {
4813         cik_cp_gfx_fini(rdev);
4814         cik_cp_compute_fini(rdev);
4815 }
4816
4817 static int cik_cp_resume(struct radeon_device *rdev)
4818 {
4819         int r;
4820
4821         cik_enable_gui_idle_interrupt(rdev, false);
4822
4823         r = cik_cp_load_microcode(rdev);
4824         if (r)
4825                 return r;
4826
4827         r = cik_cp_gfx_resume(rdev);
4828         if (r)
4829                 return r;
4830         r = cik_cp_compute_resume(rdev);
4831         if (r)
4832                 return r;
4833
4834         cik_enable_gui_idle_interrupt(rdev, true);
4835
4836         return 0;
4837 }
4838
4839 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4840 {
4841         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4842                 RREG32(GRBM_STATUS));
4843         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4844                 RREG32(GRBM_STATUS2));
4845         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4846                 RREG32(GRBM_STATUS_SE0));
4847         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4848                 RREG32(GRBM_STATUS_SE1));
4849         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4850                 RREG32(GRBM_STATUS_SE2));
4851         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4852                 RREG32(GRBM_STATUS_SE3));
4853         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4854                 RREG32(SRBM_STATUS));
4855         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4856                 RREG32(SRBM_STATUS2));
4857         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4858                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4859         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4860                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4861         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4862         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4863                  RREG32(CP_STALLED_STAT1));
4864         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4865                  RREG32(CP_STALLED_STAT2));
4866         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4867                  RREG32(CP_STALLED_STAT3));
4868         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4869                  RREG32(CP_CPF_BUSY_STAT));
4870         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4871                  RREG32(CP_CPF_STALLED_STAT1));
4872         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4873         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4874         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4875                  RREG32(CP_CPC_STALLED_STAT1));
4876         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4877 }
4878
4879 /**
4880  * cik_gpu_check_soft_reset - check which blocks are busy
4881  *
4882  * @rdev: radeon_device pointer
4883  *
4884  * Check which blocks are busy and return the relevant reset
4885  * mask to be used by cik_gpu_soft_reset().
4886  * Returns a mask of the blocks to be reset.
4887  */
4888 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4889 {
4890         u32 reset_mask = 0;
4891         u32 tmp;
4892
4893         /* GRBM_STATUS */
4894         tmp = RREG32(GRBM_STATUS);
4895         if (tmp & (PA_BUSY | SC_BUSY |
4896                    BCI_BUSY | SX_BUSY |
4897                    TA_BUSY | VGT_BUSY |
4898                    DB_BUSY | CB_BUSY |
4899                    GDS_BUSY | SPI_BUSY |
4900                    IA_BUSY | IA_BUSY_NO_DMA))
4901                 reset_mask |= RADEON_RESET_GFX;
4902
4903         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4904                 reset_mask |= RADEON_RESET_CP;
4905
4906         /* GRBM_STATUS2 */
4907         tmp = RREG32(GRBM_STATUS2);
4908         if (tmp & RLC_BUSY)
4909                 reset_mask |= RADEON_RESET_RLC;
4910
4911         /* SDMA0_STATUS_REG */
4912         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4913         if (!(tmp & SDMA_IDLE))
4914                 reset_mask |= RADEON_RESET_DMA;
4915
4916         /* SDMA1_STATUS_REG */
4917         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4918         if (!(tmp & SDMA_IDLE))
4919                 reset_mask |= RADEON_RESET_DMA1;
4920
4921         /* SRBM_STATUS2 */
4922         tmp = RREG32(SRBM_STATUS2);
4923         if (tmp & SDMA_BUSY)
4924                 reset_mask |= RADEON_RESET_DMA;
4925
4926         if (tmp & SDMA1_BUSY)
4927                 reset_mask |= RADEON_RESET_DMA1;
4928
4929         /* SRBM_STATUS */
4930         tmp = RREG32(SRBM_STATUS);
4931
4932         if (tmp & IH_BUSY)
4933                 reset_mask |= RADEON_RESET_IH;
4934
4935         if (tmp & SEM_BUSY)
4936                 reset_mask |= RADEON_RESET_SEM;
4937
4938         if (tmp & GRBM_RQ_PENDING)
4939                 reset_mask |= RADEON_RESET_GRBM;
4940
4941         if (tmp & VMC_BUSY)
4942                 reset_mask |= RADEON_RESET_VMC;
4943
4944         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4945                    MCC_BUSY | MCD_BUSY))
4946                 reset_mask |= RADEON_RESET_MC;
4947
4948         if (evergreen_is_display_hung(rdev))
4949                 reset_mask |= RADEON_RESET_DISPLAY;
4950
4951         /* Skip MC reset as it's mostly likely not hung, just busy */
4952         if (reset_mask & RADEON_RESET_MC) {
4953                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4954                 reset_mask &= ~RADEON_RESET_MC;
4955         }
4956
4957         return reset_mask;
4958 }
4959
4960 /**
4961  * cik_gpu_soft_reset - soft reset GPU
4962  *
4963  * @rdev: radeon_device pointer
4964  * @reset_mask: mask of which blocks to reset
4965  *
4966  * Soft reset the blocks specified in @reset_mask.
4967  */
4968 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4969 {
4970         struct evergreen_mc_save save;
4971         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4972         u32 tmp;
4973
4974         if (reset_mask == 0)
4975                 return;
4976
4977         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4978
4979         cik_print_gpu_status_regs(rdev);
4980         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4981                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4982         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4983                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4984
4985         /* disable CG/PG */
4986         cik_fini_pg(rdev);
4987         cik_fini_cg(rdev);
4988
4989         /* stop the rlc */
4990         cik_rlc_stop(rdev);
4991
4992         /* Disable GFX parsing/prefetching */
4993         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4994
4995         /* Disable MEC parsing/prefetching */
4996         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4997
4998         if (reset_mask & RADEON_RESET_DMA) {
4999                 /* sdma0 */
5000                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5001                 tmp |= SDMA_HALT;
5002                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5003         }
5004         if (reset_mask & RADEON_RESET_DMA1) {
5005                 /* sdma1 */
5006                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5007                 tmp |= SDMA_HALT;
5008                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5009         }
5010
5011         evergreen_mc_stop(rdev, &save);
5012         if (evergreen_mc_wait_for_idle(rdev)) {
5013                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5014         }
5015
5016         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5017                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5018
5019         if (reset_mask & RADEON_RESET_CP) {
5020                 grbm_soft_reset |= SOFT_RESET_CP;
5021
5022                 srbm_soft_reset |= SOFT_RESET_GRBM;
5023         }
5024
5025         if (reset_mask & RADEON_RESET_DMA)
5026                 srbm_soft_reset |= SOFT_RESET_SDMA;
5027
5028         if (reset_mask & RADEON_RESET_DMA1)
5029                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5030
5031         if (reset_mask & RADEON_RESET_DISPLAY)
5032                 srbm_soft_reset |= SOFT_RESET_DC;
5033
5034         if (reset_mask & RADEON_RESET_RLC)
5035                 grbm_soft_reset |= SOFT_RESET_RLC;
5036
5037         if (reset_mask & RADEON_RESET_SEM)
5038                 srbm_soft_reset |= SOFT_RESET_SEM;
5039
5040         if (reset_mask & RADEON_RESET_IH)
5041                 srbm_soft_reset |= SOFT_RESET_IH;
5042
5043         if (reset_mask & RADEON_RESET_GRBM)
5044                 srbm_soft_reset |= SOFT_RESET_GRBM;
5045
5046         if (reset_mask & RADEON_RESET_VMC)
5047                 srbm_soft_reset |= SOFT_RESET_VMC;
5048
5049         if (!(rdev->flags & RADEON_IS_IGP)) {
5050                 if (reset_mask & RADEON_RESET_MC)
5051                         srbm_soft_reset |= SOFT_RESET_MC;
5052         }
5053
5054         if (grbm_soft_reset) {
5055                 tmp = RREG32(GRBM_SOFT_RESET);
5056                 tmp |= grbm_soft_reset;
5057                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5058                 WREG32(GRBM_SOFT_RESET, tmp);
5059                 tmp = RREG32(GRBM_SOFT_RESET);
5060
5061                 udelay(50);
5062
5063                 tmp &= ~grbm_soft_reset;
5064                 WREG32(GRBM_SOFT_RESET, tmp);
5065                 tmp = RREG32(GRBM_SOFT_RESET);
5066         }
5067
5068         if (srbm_soft_reset) {
5069                 tmp = RREG32(SRBM_SOFT_RESET);
5070                 tmp |= srbm_soft_reset;
5071                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5072                 WREG32(SRBM_SOFT_RESET, tmp);
5073                 tmp = RREG32(SRBM_SOFT_RESET);
5074
5075                 udelay(50);
5076
5077                 tmp &= ~srbm_soft_reset;
5078                 WREG32(SRBM_SOFT_RESET, tmp);
5079                 tmp = RREG32(SRBM_SOFT_RESET);
5080         }
5081
5082         /* Wait a little for things to settle down */
5083         udelay(50);
5084
5085         evergreen_mc_resume(rdev, &save);
5086         udelay(50);
5087
5088         cik_print_gpu_status_regs(rdev);
5089 }
5090
5091 struct kv_reset_save_regs {
5092         u32 gmcon_reng_execute;
5093         u32 gmcon_misc;
5094         u32 gmcon_misc3;
5095 };
5096
5097 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5098                                    struct kv_reset_save_regs *save)
5099 {
5100         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5101         save->gmcon_misc = RREG32(GMCON_MISC);
5102         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5103
5104         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5105         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5106                                                 STCTRL_STUTTER_EN));
5107 }
5108
5109 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5110                                       struct kv_reset_save_regs *save)
5111 {
5112         int i;
5113
5114         WREG32(GMCON_PGFSM_WRITE, 0);
5115         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5116
5117         for (i = 0; i < 5; i++)
5118                 WREG32(GMCON_PGFSM_WRITE, 0);
5119
5120         WREG32(GMCON_PGFSM_WRITE, 0);
5121         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5122
5123         for (i = 0; i < 5; i++)
5124                 WREG32(GMCON_PGFSM_WRITE, 0);
5125
5126         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5127         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5128
5129         for (i = 0; i < 5; i++)
5130                 WREG32(GMCON_PGFSM_WRITE, 0);
5131
5132         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5133         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5134
5135         for (i = 0; i < 5; i++)
5136                 WREG32(GMCON_PGFSM_WRITE, 0);
5137
5138         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5139         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5140
5141         for (i = 0; i < 5; i++)
5142                 WREG32(GMCON_PGFSM_WRITE, 0);
5143
5144         WREG32(GMCON_PGFSM_WRITE, 0);
5145         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5146
5147         for (i = 0; i < 5; i++)
5148                 WREG32(GMCON_PGFSM_WRITE, 0);
5149
5150         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5151         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5152
5153         for (i = 0; i < 5; i++)
5154                 WREG32(GMCON_PGFSM_WRITE, 0);
5155
5156         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5157         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5158
5159         for (i = 0; i < 5; i++)
5160                 WREG32(GMCON_PGFSM_WRITE, 0);
5161
5162         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5163         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5164
5165         for (i = 0; i < 5; i++)
5166                 WREG32(GMCON_PGFSM_WRITE, 0);
5167
5168         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5169         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5170
5171         for (i = 0; i < 5; i++)
5172                 WREG32(GMCON_PGFSM_WRITE, 0);
5173
5174         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5175         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5176
5177         WREG32(GMCON_MISC3, save->gmcon_misc3);
5178         WREG32(GMCON_MISC, save->gmcon_misc);
5179         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5180 }
5181
5182 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5183 {
5184         struct evergreen_mc_save save;
5185         struct kv_reset_save_regs kv_save = { 0 };
5186         u32 tmp, i;
5187
5188         dev_info(rdev->dev, "GPU pci config reset\n");
5189
5190         /* disable dpm? */
5191
5192         /* disable cg/pg */
5193         cik_fini_pg(rdev);
5194         cik_fini_cg(rdev);
5195
5196         /* Disable GFX parsing/prefetching */
5197         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5198
5199         /* Disable MEC parsing/prefetching */
5200         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5201
5202         /* sdma0 */
5203         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5204         tmp |= SDMA_HALT;
5205         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5206         /* sdma1 */
5207         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5208         tmp |= SDMA_HALT;
5209         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5210         /* XXX other engines? */
5211
5212         /* halt the rlc, disable cp internal ints */
5213         cik_rlc_stop(rdev);
5214
5215         udelay(50);
5216
5217         /* disable mem access */
5218         evergreen_mc_stop(rdev, &save);
5219         if (evergreen_mc_wait_for_idle(rdev)) {
5220                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5221         }
5222
5223         if (rdev->flags & RADEON_IS_IGP)
5224                 kv_save_regs_for_reset(rdev, &kv_save);
5225
5226         /* disable BM */
5227         pci_clear_master(rdev->pdev);
5228         /* reset */
5229         radeon_pci_config_reset(rdev);
5230
5231         udelay(100);
5232
5233         /* wait for asic to come out of reset */
5234         for (i = 0; i < rdev->usec_timeout; i++) {
5235                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5236                         break;
5237                 udelay(1);
5238         }
5239
5240         /* does asic init need to be run first??? */
5241         if (rdev->flags & RADEON_IS_IGP)
5242                 kv_restore_regs_for_reset(rdev, &kv_save);
5243 }
5244
5245 /**
5246  * cik_asic_reset - soft reset GPU
5247  *
5248  * @rdev: radeon_device pointer
5249  * @hard: force hard reset
5250  *
5251  * Look up which blocks are hung and attempt
5252  * to reset them.
5253  * Returns 0 for success.
5254  */
5255 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5256 {
5257         u32 reset_mask;
5258
5259         if (hard) {
5260                 cik_gpu_pci_config_reset(rdev);
5261                 return 0;
5262         }
5263
5264         reset_mask = cik_gpu_check_soft_reset(rdev);
5265
5266         if (reset_mask)
5267                 r600_set_bios_scratch_engine_hung(rdev, true);
5268
5269         /* try soft reset */
5270         cik_gpu_soft_reset(rdev, reset_mask);
5271
5272         reset_mask = cik_gpu_check_soft_reset(rdev);
5273
5274         /* try pci config reset */
5275         if (reset_mask && radeon_hard_reset)
5276                 cik_gpu_pci_config_reset(rdev);
5277
5278         reset_mask = cik_gpu_check_soft_reset(rdev);
5279
5280         if (!reset_mask)
5281                 r600_set_bios_scratch_engine_hung(rdev, false);
5282
5283         return 0;
5284 }
5285
5286 /**
5287  * cik_gfx_is_lockup - check if the 3D engine is locked up
5288  *
5289  * @rdev: radeon_device pointer
5290  * @ring: radeon_ring structure holding ring information
5291  *
5292  * Check if the 3D engine is locked up (CIK).
5293  * Returns true if the engine is locked, false if not.
5294  */
5295 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5296 {
5297         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5298
5299         if (!(reset_mask & (RADEON_RESET_GFX |
5300                             RADEON_RESET_COMPUTE |
5301                             RADEON_RESET_CP))) {
5302                 radeon_ring_lockup_update(rdev, ring);
5303                 return false;
5304         }
5305         return radeon_ring_test_lockup(rdev, ring);
5306 }
5307
5308 /* MC */
5309 /**
5310  * cik_mc_program - program the GPU memory controller
5311  *
5312  * @rdev: radeon_device pointer
5313  *
5314  * Set the location of vram, gart, and AGP in the GPU's
5315  * physical address space (CIK).
5316  */
5317 static void cik_mc_program(struct radeon_device *rdev)
5318 {
5319         struct evergreen_mc_save save;
5320         u32 tmp;
5321         int i, j;
5322
5323         /* Initialize HDP */
5324         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5325                 WREG32((0x2c14 + j), 0x00000000);
5326                 WREG32((0x2c18 + j), 0x00000000);
5327                 WREG32((0x2c1c + j), 0x00000000);
5328                 WREG32((0x2c20 + j), 0x00000000);
5329                 WREG32((0x2c24 + j), 0x00000000);
5330         }
5331         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5332
5333         evergreen_mc_stop(rdev, &save);
5334         if (radeon_mc_wait_for_idle(rdev)) {
5335                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5336         }
5337         /* Lockout access through VGA aperture*/
5338         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5339         /* Update configuration */
5340         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5341                rdev->mc.vram_start >> 12);
5342         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5343                rdev->mc.vram_end >> 12);
5344         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5345                rdev->vram_scratch.gpu_addr >> 12);
5346         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5347         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5348         WREG32(MC_VM_FB_LOCATION, tmp);
5349         /* XXX double check these! */
5350         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5351         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5352         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5353         WREG32(MC_VM_AGP_BASE, 0);
5354         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5355         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5356         if (radeon_mc_wait_for_idle(rdev)) {
5357                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5358         }
5359         evergreen_mc_resume(rdev, &save);
5360         /* we need to own VRAM, so turn off the VGA renderer here
5361          * to stop it overwriting our objects */
5362         rv515_vga_render_disable(rdev);
5363 }
5364
5365 /**
5366  * cik_mc_init - initialize the memory controller driver params
5367  *
5368  * @rdev: radeon_device pointer
5369  *
5370  * Look up the amount of vram, vram width, and decide how to place
5371  * vram and gart within the GPU's physical address space (CIK).
5372  * Returns 0 for success.
5373  */
5374 static int cik_mc_init(struct radeon_device *rdev)
5375 {
5376         u32 tmp;
5377         int chansize, numchan;
5378
5379         /* Get VRAM informations */
5380         rdev->mc.vram_is_ddr = true;
5381         tmp = RREG32(MC_ARB_RAMCFG);
5382         if (tmp & CHANSIZE_MASK) {
5383                 chansize = 64;
5384         } else {
5385                 chansize = 32;
5386         }
5387         tmp = RREG32(MC_SHARED_CHMAP);
5388         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5389         case 0:
5390         default:
5391                 numchan = 1;
5392                 break;
5393         case 1:
5394                 numchan = 2;
5395                 break;
5396         case 2:
5397                 numchan = 4;
5398                 break;
5399         case 3:
5400                 numchan = 8;
5401                 break;
5402         case 4:
5403                 numchan = 3;
5404                 break;
5405         case 5:
5406                 numchan = 6;
5407                 break;
5408         case 6:
5409                 numchan = 10;
5410                 break;
5411         case 7:
5412                 numchan = 12;
5413                 break;
5414         case 8:
5415                 numchan = 16;
5416                 break;
5417         }
5418         rdev->mc.vram_width = numchan * chansize;
5419         /* Could aper size report 0 ? */
5420         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5421         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5422         /* size in MB on si */
5423         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5424         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5425         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5426         si_vram_gtt_location(rdev, &rdev->mc);
5427         radeon_update_bandwidth_info(rdev);
5428
5429         return 0;
5430 }
5431
5432 /*
5433  * GART
5434  * VMID 0 is the physical GPU addresses as used by the kernel.
5435  * VMIDs 1-15 are used for userspace clients and are handled
5436  * by the radeon vm/hsa code.
5437  */
5438 /**
5439  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5440  *
5441  * @rdev: radeon_device pointer
5442  *
5443  * Flush the TLB for the VMID 0 page table (CIK).
5444  */
5445 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5446 {
5447         /* flush hdp cache */
5448         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5449
5450         /* bits 0-15 are the VM contexts0-15 */
5451         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5452 }
5453
5454 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5455 {
5456         int i;
5457         uint32_t sh_mem_bases, sh_mem_config;
5458
5459         sh_mem_bases = 0x6000 | 0x6000 << 16;
5460         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5461         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5462
5463         mutex_lock(&rdev->srbm_mutex);
5464         for (i = 8; i < 16; i++) {
5465                 cik_srbm_select(rdev, 0, 0, 0, i);
5466                 /* CP and shaders */
5467                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5468                 WREG32(SH_MEM_APE1_BASE, 1);
5469                 WREG32(SH_MEM_APE1_LIMIT, 0);
5470                 WREG32(SH_MEM_BASES, sh_mem_bases);
5471         }
5472         cik_srbm_select(rdev, 0, 0, 0, 0);
5473         mutex_unlock(&rdev->srbm_mutex);
5474 }
5475
5476 /**
5477  * cik_pcie_gart_enable - gart enable
5478  *
5479  * @rdev: radeon_device pointer
5480  *
5481  * This sets up the TLBs, programs the page tables for VMID0,
5482  * sets up the hw for VMIDs 1-15 which are allocated on
5483  * demand, and sets up the global locations for the LDS, GDS,
5484  * and GPUVM for FSA64 clients (CIK).
5485  * Returns 0 for success, errors for failure.
5486  */
5487 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5488 {
5489         int r, i;
5490
5491         if (rdev->gart.robj == NULL) {
5492                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5493                 return -EINVAL;
5494         }
5495         r = radeon_gart_table_vram_pin(rdev);
5496         if (r)
5497                 return r;
5498         /* Setup TLB control */
5499         WREG32(MC_VM_MX_L1_TLB_CNTL,
5500                (0xA << 7) |
5501                ENABLE_L1_TLB |
5502                ENABLE_L1_FRAGMENT_PROCESSING |
5503                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5504                ENABLE_ADVANCED_DRIVER_MODEL |
5505                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5506         /* Setup L2 cache */
5507         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5508                ENABLE_L2_FRAGMENT_PROCESSING |
5509                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5510                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5511                EFFECTIVE_L2_QUEUE_SIZE(7) |
5512                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5513         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5514         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5515                BANK_SELECT(4) |
5516                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5517         /* setup context0 */
5518         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5519         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5520         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5521         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5522                         (u32)(rdev->dummy_page.addr >> 12));
5523         WREG32(VM_CONTEXT0_CNTL2, 0);
5524         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5525                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5526
5527         WREG32(0x15D4, 0);
5528         WREG32(0x15D8, 0);
5529         WREG32(0x15DC, 0);
5530
5531         /* restore context1-15 */
5532         /* set vm size, must be a multiple of 4 */
5533         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5534         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5535         for (i = 1; i < 16; i++) {
5536                 if (i < 8)
5537                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5538                                rdev->vm_manager.saved_table_addr[i]);
5539                 else
5540                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5541                                rdev->vm_manager.saved_table_addr[i]);
5542         }
5543
5544         /* enable context1-15 */
5545         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5546                (u32)(rdev->dummy_page.addr >> 12));
5547         WREG32(VM_CONTEXT1_CNTL2, 4);
5548         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5549                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5550                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5551                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5552                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5553                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5554                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5555                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5556                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5557                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5558                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5559                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5560                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5561                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5562
5563         if (rdev->family == CHIP_KAVERI) {
5564                 u32 tmp = RREG32(CHUB_CONTROL);
5565                 tmp &= ~BYPASS_VM;
5566                 WREG32(CHUB_CONTROL, tmp);
5567         }
5568
5569         /* XXX SH_MEM regs */
5570         /* where to put LDS, scratch, GPUVM in FSA64 space */
5571         mutex_lock(&rdev->srbm_mutex);
5572         for (i = 0; i < 16; i++) {
5573                 cik_srbm_select(rdev, 0, 0, 0, i);
5574                 /* CP and shaders */
5575                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5576                 WREG32(SH_MEM_APE1_BASE, 1);
5577                 WREG32(SH_MEM_APE1_LIMIT, 0);
5578                 WREG32(SH_MEM_BASES, 0);
5579                 /* SDMA GFX */
5580                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5581                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5582                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5583                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5584                 /* XXX SDMA RLC - todo */
5585         }
5586         cik_srbm_select(rdev, 0, 0, 0, 0);
5587         mutex_unlock(&rdev->srbm_mutex);
5588
5589         cik_pcie_init_compute_vmid(rdev);
5590
5591         cik_pcie_gart_tlb_flush(rdev);
5592         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5593                  (unsigned)(rdev->mc.gtt_size >> 20),
5594                  (unsigned long long)rdev->gart.table_addr);
5595         rdev->gart.ready = true;
5596         return 0;
5597 }
5598
5599 /**
5600  * cik_pcie_gart_disable - gart disable
5601  *
5602  * @rdev: radeon_device pointer
5603  *
5604  * This disables all VM page table (CIK).
5605  */
5606 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5607 {
5608         unsigned i;
5609
5610         for (i = 1; i < 16; ++i) {
5611                 uint32_t reg;
5612                 if (i < 8)
5613                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5614                 else
5615                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5616                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5617         }
5618
5619         /* Disable all tables */
5620         WREG32(VM_CONTEXT0_CNTL, 0);
5621         WREG32(VM_CONTEXT1_CNTL, 0);
5622         /* Setup TLB control */
5623         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5624                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5625         /* Setup L2 cache */
5626         WREG32(VM_L2_CNTL,
5627                ENABLE_L2_FRAGMENT_PROCESSING |
5628                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5629                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5630                EFFECTIVE_L2_QUEUE_SIZE(7) |
5631                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5632         WREG32(VM_L2_CNTL2, 0);
5633         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5634                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5635         radeon_gart_table_vram_unpin(rdev);
5636 }
5637
5638 /**
5639  * cik_pcie_gart_fini - vm fini callback
5640  *
5641  * @rdev: radeon_device pointer
5642  *
5643  * Tears down the driver GART/VM setup (CIK).
5644  */
5645 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5646 {
5647         cik_pcie_gart_disable(rdev);
5648         radeon_gart_table_vram_free(rdev);
5649         radeon_gart_fini(rdev);
5650 }
5651
5652 /* vm parser */
5653 /**
5654  * cik_ib_parse - vm ib_parse callback
5655  *
5656  * @rdev: radeon_device pointer
5657  * @ib: indirect buffer pointer
5658  *
5659  * CIK uses hw IB checking so this is a nop (CIK).
5660  */
5661 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5662 {
5663         return 0;
5664 }
5665
5666 /*
5667  * vm
5668  * VMID 0 is the physical GPU addresses as used by the kernel.
5669  * VMIDs 1-15 are used for userspace clients and are handled
5670  * by the radeon vm/hsa code.
5671  */
5672 /**
5673  * cik_vm_init - cik vm init callback
5674  *
5675  * @rdev: radeon_device pointer
5676  *
5677  * Inits cik specific vm parameters (number of VMs, base of vram for
5678  * VMIDs 1-15) (CIK).
5679  * Returns 0 for success.
5680  */
5681 int cik_vm_init(struct radeon_device *rdev)
5682 {
5683         /*
5684          * number of VMs
5685          * VMID 0 is reserved for System
5686          * radeon graphics/compute will use VMIDs 1-15
5687          */
5688         rdev->vm_manager.nvm = 16;
5689         /* base offset of vram pages */
5690         if (rdev->flags & RADEON_IS_IGP) {
5691                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5692                 tmp <<= 22;
5693                 rdev->vm_manager.vram_base_offset = tmp;
5694         } else
5695                 rdev->vm_manager.vram_base_offset = 0;
5696
5697         return 0;
5698 }
5699
5700 /**
5701  * cik_vm_fini - cik vm fini callback
5702  *
5703  * @rdev: radeon_device pointer
5704  *
5705  * Tear down any asic specific VM setup (CIK).
5706  */
5707 void cik_vm_fini(struct radeon_device *rdev)
5708 {
5709 }
5710
5711 /**
5712  * cik_vm_decode_fault - print human readable fault info
5713  *
5714  * @rdev: radeon_device pointer
5715  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5716  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5717  *
5718  * Print human readable fault information (CIK).
5719  */
5720 static void cik_vm_decode_fault(struct radeon_device *rdev,
5721                                 u32 status, u32 addr, u32 mc_client)
5722 {
5723         u32 mc_id;
5724         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5725         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5726         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5727                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5728
5729         if (rdev->family == CHIP_HAWAII)
5730                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5731         else
5732                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5733
5734         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5735                protections, vmid, addr,
5736                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5737                block, mc_client, mc_id);
5738 }
5739
5740 /**
5741  * cik_vm_flush - cik vm flush using the CP
5742  *
5743  * @rdev: radeon_device pointer
5744  *
5745  * Update the page table base and flush the VM TLB
5746  * using the CP (CIK).
5747  */
5748 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5749                   unsigned vm_id, uint64_t pd_addr)
5750 {
5751         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5752
5753         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5754         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5755                                  WRITE_DATA_DST_SEL(0)));
5756         if (vm_id < 8) {
5757                 radeon_ring_write(ring,
5758                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5759         } else {
5760                 radeon_ring_write(ring,
5761                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5762         }
5763         radeon_ring_write(ring, 0);
5764         radeon_ring_write(ring, pd_addr >> 12);
5765
5766         /* update SH_MEM_* regs */
5767         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5768         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5769                                  WRITE_DATA_DST_SEL(0)));
5770         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5771         radeon_ring_write(ring, 0);
5772         radeon_ring_write(ring, VMID(vm_id));
5773
5774         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5775         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5776                                  WRITE_DATA_DST_SEL(0)));
5777         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5778         radeon_ring_write(ring, 0);
5779
5780         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5781         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5782         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5783         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5784
5785         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5786         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5787                                  WRITE_DATA_DST_SEL(0)));
5788         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5789         radeon_ring_write(ring, 0);
5790         radeon_ring_write(ring, VMID(0));
5791
5792         /* HDP flush */
5793         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5794
5795         /* bits 0-15 are the VM contexts0-15 */
5796         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5797         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5798                                  WRITE_DATA_DST_SEL(0)));
5799         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5800         radeon_ring_write(ring, 0);
5801         radeon_ring_write(ring, 1 << vm_id);
5802
5803         /* wait for the invalidate to complete */
5804         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5805         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5806                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5807                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5808         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5809         radeon_ring_write(ring, 0);
5810         radeon_ring_write(ring, 0); /* ref */
5811         radeon_ring_write(ring, 0); /* mask */
5812         radeon_ring_write(ring, 0x20); /* poll interval */
5813
5814         /* compute doesn't have PFP */
5815         if (usepfp) {
5816                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5817                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5818                 radeon_ring_write(ring, 0x0);
5819         }
5820 }
5821
5822 /*
5823  * RLC
5824  * The RLC is a multi-purpose microengine that handles a
5825  * variety of functions, the most important of which is
5826  * the interrupt controller.
5827  */
5828 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5829                                           bool enable)
5830 {
5831         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5832
5833         if (enable)
5834                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5835         else
5836                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5837         WREG32(CP_INT_CNTL_RING0, tmp);
5838 }
5839
5840 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5841 {
5842         u32 tmp;
5843
5844         tmp = RREG32(RLC_LB_CNTL);
5845         if (enable)
5846                 tmp |= LOAD_BALANCE_ENABLE;
5847         else
5848                 tmp &= ~LOAD_BALANCE_ENABLE;
5849         WREG32(RLC_LB_CNTL, tmp);
5850 }
5851
5852 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5853 {
5854         u32 i, j, k;
5855         u32 mask;
5856
5857         mutex_lock(&rdev->grbm_idx_mutex);
5858         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5859                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5860                         cik_select_se_sh(rdev, i, j);
5861                         for (k = 0; k < rdev->usec_timeout; k++) {
5862                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5863                                         break;
5864                                 udelay(1);
5865                         }
5866                 }
5867         }
5868         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5869         mutex_unlock(&rdev->grbm_idx_mutex);
5870
5871         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5872         for (k = 0; k < rdev->usec_timeout; k++) {
5873                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5874                         break;
5875                 udelay(1);
5876         }
5877 }
5878
5879 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5880 {
5881         u32 tmp;
5882
5883         tmp = RREG32(RLC_CNTL);
5884         if (tmp != rlc)
5885                 WREG32(RLC_CNTL, rlc);
5886 }
5887
5888 static u32 cik_halt_rlc(struct radeon_device *rdev)
5889 {
5890         u32 data, orig;
5891
5892         orig = data = RREG32(RLC_CNTL);
5893
5894         if (data & RLC_ENABLE) {
5895                 u32 i;
5896
5897                 data &= ~RLC_ENABLE;
5898                 WREG32(RLC_CNTL, data);
5899
5900                 for (i = 0; i < rdev->usec_timeout; i++) {
5901                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5902                                 break;
5903                         udelay(1);
5904                 }
5905
5906                 cik_wait_for_rlc_serdes(rdev);
5907         }
5908
5909         return orig;
5910 }
5911
5912 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5913 {
5914         u32 tmp, i, mask;
5915
5916         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5917         WREG32(RLC_GPR_REG2, tmp);
5918
5919         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5920         for (i = 0; i < rdev->usec_timeout; i++) {
5921                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5922                         break;
5923                 udelay(1);
5924         }
5925
5926         for (i = 0; i < rdev->usec_timeout; i++) {
5927                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5928                         break;
5929                 udelay(1);
5930         }
5931 }
5932
5933 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5934 {
5935         u32 tmp;
5936
5937         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5938         WREG32(RLC_GPR_REG2, tmp);
5939 }
5940
5941 /**
5942  * cik_rlc_stop - stop the RLC ME
5943  *
5944  * @rdev: radeon_device pointer
5945  *
5946  * Halt the RLC ME (MicroEngine) (CIK).
5947  */
5948 static void cik_rlc_stop(struct radeon_device *rdev)
5949 {
5950         WREG32(RLC_CNTL, 0);
5951
5952         cik_enable_gui_idle_interrupt(rdev, false);
5953
5954         cik_wait_for_rlc_serdes(rdev);
5955 }
5956
5957 /**
5958  * cik_rlc_start - start the RLC ME
5959  *
5960  * @rdev: radeon_device pointer
5961  *
5962  * Unhalt the RLC ME (MicroEngine) (CIK).
5963  */
5964 static void cik_rlc_start(struct radeon_device *rdev)
5965 {
5966         WREG32(RLC_CNTL, RLC_ENABLE);
5967
5968         cik_enable_gui_idle_interrupt(rdev, true);
5969
5970         udelay(50);
5971 }
5972
5973 /**
5974  * cik_rlc_resume - setup the RLC hw
5975  *
5976  * @rdev: radeon_device pointer
5977  *
5978  * Initialize the RLC registers, load the ucode,
5979  * and start the RLC (CIK).
5980  * Returns 0 for success, -EINVAL if the ucode is not available.
5981  */
5982 static int cik_rlc_resume(struct radeon_device *rdev)
5983 {
5984         u32 i, size, tmp;
5985
5986         if (!rdev->rlc_fw)
5987                 return -EINVAL;
5988
5989         cik_rlc_stop(rdev);
5990
5991         /* disable CG */
5992         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5993         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5994
5995         si_rlc_reset(rdev);
5996
5997         cik_init_pg(rdev);
5998
5999         cik_init_cg(rdev);
6000
6001         WREG32(RLC_LB_CNTR_INIT, 0);
6002         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6003
6004         mutex_lock(&rdev->grbm_idx_mutex);
6005         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6006         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6007         WREG32(RLC_LB_PARAMS, 0x00600408);
6008         WREG32(RLC_LB_CNTL, 0x80000004);
6009         mutex_unlock(&rdev->grbm_idx_mutex);
6010
6011         WREG32(RLC_MC_CNTL, 0);
6012         WREG32(RLC_UCODE_CNTL, 0);
6013
6014         if (rdev->new_fw) {
6015                 const struct rlc_firmware_header_v1_0 *hdr =
6016                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6017                 const __le32 *fw_data = (const __le32 *)
6018                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6019
6020                 radeon_ucode_print_rlc_hdr(&hdr->header);
6021
6022                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6023                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6024                 for (i = 0; i < size; i++)
6025                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6026                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6027         } else {
6028                 const __be32 *fw_data;
6029
6030                 switch (rdev->family) {
6031                 case CHIP_BONAIRE:
6032                 case CHIP_HAWAII:
6033                 default:
6034                         size = BONAIRE_RLC_UCODE_SIZE;
6035                         break;
6036                 case CHIP_KAVERI:
6037                         size = KV_RLC_UCODE_SIZE;
6038                         break;
6039                 case CHIP_KABINI:
6040                         size = KB_RLC_UCODE_SIZE;
6041                         break;
6042                 case CHIP_MULLINS:
6043                         size = ML_RLC_UCODE_SIZE;
6044                         break;
6045                 }
6046
6047                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6048                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6049                 for (i = 0; i < size; i++)
6050                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6051                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6052         }
6053
6054         /* XXX - find out what chips support lbpw */
6055         cik_enable_lbpw(rdev, false);
6056
6057         if (rdev->family == CHIP_BONAIRE)
6058                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6059
6060         cik_rlc_start(rdev);
6061
6062         return 0;
6063 }
6064
6065 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6066 {
6067         u32 data, orig, tmp, tmp2;
6068
6069         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6070
6071         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6072                 cik_enable_gui_idle_interrupt(rdev, true);
6073
6074                 tmp = cik_halt_rlc(rdev);
6075
6076                 mutex_lock(&rdev->grbm_idx_mutex);
6077                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6078                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6079                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6080                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6081                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6082                 mutex_unlock(&rdev->grbm_idx_mutex);
6083
6084                 cik_update_rlc(rdev, tmp);
6085
6086                 data |= CGCG_EN | CGLS_EN;
6087         } else {
6088                 cik_enable_gui_idle_interrupt(rdev, false);
6089
6090                 RREG32(CB_CGTT_SCLK_CTRL);
6091                 RREG32(CB_CGTT_SCLK_CTRL);
6092                 RREG32(CB_CGTT_SCLK_CTRL);
6093                 RREG32(CB_CGTT_SCLK_CTRL);
6094
6095                 data &= ~(CGCG_EN | CGLS_EN);
6096         }
6097
6098         if (orig != data)
6099                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6100
6101 }
6102
6103 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6104 {
6105         u32 data, orig, tmp = 0;
6106
6107         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6108                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6109                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6110                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6111                                 data |= CP_MEM_LS_EN;
6112                                 if (orig != data)
6113                                         WREG32(CP_MEM_SLP_CNTL, data);
6114                         }
6115                 }
6116
6117                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6118                 data |= 0x00000001;
6119                 data &= 0xfffffffd;
6120                 if (orig != data)
6121                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6122
6123                 tmp = cik_halt_rlc(rdev);
6124
6125                 mutex_lock(&rdev->grbm_idx_mutex);
6126                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6127                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6128                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6129                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6130                 WREG32(RLC_SERDES_WR_CTRL, data);
6131                 mutex_unlock(&rdev->grbm_idx_mutex);
6132
6133                 cik_update_rlc(rdev, tmp);
6134
6135                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6136                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6137                         data &= ~SM_MODE_MASK;
6138                         data |= SM_MODE(0x2);
6139                         data |= SM_MODE_ENABLE;
6140                         data &= ~CGTS_OVERRIDE;
6141                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6142                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6143                                 data &= ~CGTS_LS_OVERRIDE;
6144                         data &= ~ON_MONITOR_ADD_MASK;
6145                         data |= ON_MONITOR_ADD_EN;
6146                         data |= ON_MONITOR_ADD(0x96);
6147                         if (orig != data)
6148                                 WREG32(CGTS_SM_CTRL_REG, data);
6149                 }
6150         } else {
6151                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6152                 data |= 0x00000003;
6153                 if (orig != data)
6154                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6155
6156                 data = RREG32(RLC_MEM_SLP_CNTL);
6157                 if (data & RLC_MEM_LS_EN) {
6158                         data &= ~RLC_MEM_LS_EN;
6159                         WREG32(RLC_MEM_SLP_CNTL, data);
6160                 }
6161
6162                 data = RREG32(CP_MEM_SLP_CNTL);
6163                 if (data & CP_MEM_LS_EN) {
6164                         data &= ~CP_MEM_LS_EN;
6165                         WREG32(CP_MEM_SLP_CNTL, data);
6166                 }
6167
6168                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6169                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6170                 if (orig != data)
6171                         WREG32(CGTS_SM_CTRL_REG, data);
6172
6173                 tmp = cik_halt_rlc(rdev);
6174
6175                 mutex_lock(&rdev->grbm_idx_mutex);
6176                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6177                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6178                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6179                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6180                 WREG32(RLC_SERDES_WR_CTRL, data);
6181                 mutex_unlock(&rdev->grbm_idx_mutex);
6182
6183                 cik_update_rlc(rdev, tmp);
6184         }
6185 }
6186
6187 static const u32 mc_cg_registers[] =
6188 {
6189         MC_HUB_MISC_HUB_CG,
6190         MC_HUB_MISC_SIP_CG,
6191         MC_HUB_MISC_VM_CG,
6192         MC_XPB_CLK_GAT,
6193         ATC_MISC_CG,
6194         MC_CITF_MISC_WR_CG,
6195         MC_CITF_MISC_RD_CG,
6196         MC_CITF_MISC_VM_CG,
6197         VM_L2_CG,
6198 };
6199
6200 static void cik_enable_mc_ls(struct radeon_device *rdev,
6201                              bool enable)
6202 {
6203         int i;
6204         u32 orig, data;
6205
6206         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6207                 orig = data = RREG32(mc_cg_registers[i]);
6208                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6209                         data |= MC_LS_ENABLE;
6210                 else
6211                         data &= ~MC_LS_ENABLE;
6212                 if (data != orig)
6213                         WREG32(mc_cg_registers[i], data);
6214         }
6215 }
6216
6217 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6218                                bool enable)
6219 {
6220         int i;
6221         u32 orig, data;
6222
6223         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6224                 orig = data = RREG32(mc_cg_registers[i]);
6225                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6226                         data |= MC_CG_ENABLE;
6227                 else
6228                         data &= ~MC_CG_ENABLE;
6229                 if (data != orig)
6230                         WREG32(mc_cg_registers[i], data);
6231         }
6232 }
6233
6234 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6235                                  bool enable)
6236 {
6237         u32 orig, data;
6238
6239         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6240                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6241                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6242         } else {
6243                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6244                 data |= 0xff000000;
6245                 if (data != orig)
6246                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6247
6248                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6249                 data |= 0xff000000;
6250                 if (data != orig)
6251                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6252         }
6253 }
6254
6255 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6256                                  bool enable)
6257 {
6258         u32 orig, data;
6259
6260         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6261                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6262                 data |= 0x100;
6263                 if (orig != data)
6264                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6265
6266                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6267                 data |= 0x100;
6268                 if (orig != data)
6269                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6270         } else {
6271                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6272                 data &= ~0x100;
6273                 if (orig != data)
6274                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6275
6276                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6277                 data &= ~0x100;
6278                 if (orig != data)
6279                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6280         }
6281 }
6282
6283 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6284                                 bool enable)
6285 {
6286         u32 orig, data;
6287
6288         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6289                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6290                 data = 0xfff;
6291                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6292
6293                 orig = data = RREG32(UVD_CGC_CTRL);
6294                 data |= DCM;
6295                 if (orig != data)
6296                         WREG32(UVD_CGC_CTRL, data);
6297         } else {
6298                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6299                 data &= ~0xfff;
6300                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6301
6302                 orig = data = RREG32(UVD_CGC_CTRL);
6303                 data &= ~DCM;
6304                 if (orig != data)
6305                         WREG32(UVD_CGC_CTRL, data);
6306         }
6307 }
6308
6309 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6310                                bool enable)
6311 {
6312         u32 orig, data;
6313
6314         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6315
6316         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6317                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6318                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6319         else
6320                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6321                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6322
6323         if (orig != data)
6324                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6325 }
6326
6327 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6328                                 bool enable)
6329 {
6330         u32 orig, data;
6331
6332         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6333
6334         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6335                 data &= ~CLOCK_GATING_DIS;
6336         else
6337                 data |= CLOCK_GATING_DIS;
6338
6339         if (orig != data)
6340                 WREG32(HDP_HOST_PATH_CNTL, data);
6341 }
6342
6343 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6344                               bool enable)
6345 {
6346         u32 orig, data;
6347
6348         orig = data = RREG32(HDP_MEM_POWER_LS);
6349
6350         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6351                 data |= HDP_LS_ENABLE;
6352         else
6353                 data &= ~HDP_LS_ENABLE;
6354
6355         if (orig != data)
6356                 WREG32(HDP_MEM_POWER_LS, data);
6357 }
6358
6359 void cik_update_cg(struct radeon_device *rdev,
6360                    u32 block, bool enable)
6361 {
6362
6363         if (block & RADEON_CG_BLOCK_GFX) {
6364                 cik_enable_gui_idle_interrupt(rdev, false);
6365                 /* order matters! */
6366                 if (enable) {
6367                         cik_enable_mgcg(rdev, true);
6368                         cik_enable_cgcg(rdev, true);
6369                 } else {
6370                         cik_enable_cgcg(rdev, false);
6371                         cik_enable_mgcg(rdev, false);
6372                 }
6373                 cik_enable_gui_idle_interrupt(rdev, true);
6374         }
6375
6376         if (block & RADEON_CG_BLOCK_MC) {
6377                 if (!(rdev->flags & RADEON_IS_IGP)) {
6378                         cik_enable_mc_mgcg(rdev, enable);
6379                         cik_enable_mc_ls(rdev, enable);
6380                 }
6381         }
6382
6383         if (block & RADEON_CG_BLOCK_SDMA) {
6384                 cik_enable_sdma_mgcg(rdev, enable);
6385                 cik_enable_sdma_mgls(rdev, enable);
6386         }
6387
6388         if (block & RADEON_CG_BLOCK_BIF) {
6389                 cik_enable_bif_mgls(rdev, enable);
6390         }
6391
6392         if (block & RADEON_CG_BLOCK_UVD) {
6393                 if (rdev->has_uvd)
6394                         cik_enable_uvd_mgcg(rdev, enable);
6395         }
6396
6397         if (block & RADEON_CG_BLOCK_HDP) {
6398                 cik_enable_hdp_mgcg(rdev, enable);
6399                 cik_enable_hdp_ls(rdev, enable);
6400         }
6401
6402         if (block & RADEON_CG_BLOCK_VCE) {
6403                 vce_v2_0_enable_mgcg(rdev, enable);
6404         }
6405 }
6406
6407 static void cik_init_cg(struct radeon_device *rdev)
6408 {
6409
6410         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6411
6412         if (rdev->has_uvd)
6413                 si_init_uvd_internal_cg(rdev);
6414
6415         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6416                              RADEON_CG_BLOCK_SDMA |
6417                              RADEON_CG_BLOCK_BIF |
6418                              RADEON_CG_BLOCK_UVD |
6419                              RADEON_CG_BLOCK_HDP), true);
6420 }
6421
6422 static void cik_fini_cg(struct radeon_device *rdev)
6423 {
6424         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6425                              RADEON_CG_BLOCK_SDMA |
6426                              RADEON_CG_BLOCK_BIF |
6427                              RADEON_CG_BLOCK_UVD |
6428                              RADEON_CG_BLOCK_HDP), false);
6429
6430         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6431 }
6432
6433 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6434                                           bool enable)
6435 {
6436         u32 data, orig;
6437
6438         orig = data = RREG32(RLC_PG_CNTL);
6439         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6440                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6441         else
6442                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6443         if (orig != data)
6444                 WREG32(RLC_PG_CNTL, data);
6445 }
6446
6447 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6448                                           bool enable)
6449 {
6450         u32 data, orig;
6451
6452         orig = data = RREG32(RLC_PG_CNTL);
6453         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6454                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6455         else
6456                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6457         if (orig != data)
6458                 WREG32(RLC_PG_CNTL, data);
6459 }
6460
6461 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6462 {
6463         u32 data, orig;
6464
6465         orig = data = RREG32(RLC_PG_CNTL);
6466         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6467                 data &= ~DISABLE_CP_PG;
6468         else
6469                 data |= DISABLE_CP_PG;
6470         if (orig != data)
6471                 WREG32(RLC_PG_CNTL, data);
6472 }
6473
6474 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6475 {
6476         u32 data, orig;
6477
6478         orig = data = RREG32(RLC_PG_CNTL);
6479         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6480                 data &= ~DISABLE_GDS_PG;
6481         else
6482                 data |= DISABLE_GDS_PG;
6483         if (orig != data)
6484                 WREG32(RLC_PG_CNTL, data);
6485 }
6486
6487 #define CP_ME_TABLE_SIZE    96
6488 #define CP_ME_TABLE_OFFSET  2048
6489 #define CP_MEC_TABLE_OFFSET 4096
6490
6491 void cik_init_cp_pg_table(struct radeon_device *rdev)
6492 {
6493         volatile u32 *dst_ptr;
6494         int me, i, max_me = 4;
6495         u32 bo_offset = 0;
6496         u32 table_offset, table_size;
6497
6498         if (rdev->family == CHIP_KAVERI)
6499                 max_me = 5;
6500
6501         if (rdev->rlc.cp_table_ptr == NULL)
6502                 return;
6503
6504         /* write the cp table buffer */
6505         dst_ptr = rdev->rlc.cp_table_ptr;
6506         for (me = 0; me < max_me; me++) {
6507                 if (rdev->new_fw) {
6508                         const __le32 *fw_data;
6509                         const struct gfx_firmware_header_v1_0 *hdr;
6510
6511                         if (me == 0) {
6512                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6513                                 fw_data = (const __le32 *)
6514                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6515                                 table_offset = le32_to_cpu(hdr->jt_offset);
6516                                 table_size = le32_to_cpu(hdr->jt_size);
6517                         } else if (me == 1) {
6518                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6519                                 fw_data = (const __le32 *)
6520                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6521                                 table_offset = le32_to_cpu(hdr->jt_offset);
6522                                 table_size = le32_to_cpu(hdr->jt_size);
6523                         } else if (me == 2) {
6524                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6525                                 fw_data = (const __le32 *)
6526                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6527                                 table_offset = le32_to_cpu(hdr->jt_offset);
6528                                 table_size = le32_to_cpu(hdr->jt_size);
6529                         } else if (me == 3) {
6530                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6531                                 fw_data = (const __le32 *)
6532                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6533                                 table_offset = le32_to_cpu(hdr->jt_offset);
6534                                 table_size = le32_to_cpu(hdr->jt_size);
6535                         } else {
6536                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6537                                 fw_data = (const __le32 *)
6538                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6539                                 table_offset = le32_to_cpu(hdr->jt_offset);
6540                                 table_size = le32_to_cpu(hdr->jt_size);
6541                         }
6542
6543                         for (i = 0; i < table_size; i ++) {
6544                                 dst_ptr[bo_offset + i] =
6545                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6546                         }
6547                         bo_offset += table_size;
6548                 } else {
6549                         const __be32 *fw_data;
6550                         table_size = CP_ME_TABLE_SIZE;
6551
6552                         if (me == 0) {
6553                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6554                                 table_offset = CP_ME_TABLE_OFFSET;
6555                         } else if (me == 1) {
6556                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6557                                 table_offset = CP_ME_TABLE_OFFSET;
6558                         } else if (me == 2) {
6559                                 fw_data = (const __be32 *)rdev->me_fw->data;
6560                                 table_offset = CP_ME_TABLE_OFFSET;
6561                         } else {
6562                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6563                                 table_offset = CP_MEC_TABLE_OFFSET;
6564                         }
6565
6566                         for (i = 0; i < table_size; i ++) {
6567                                 dst_ptr[bo_offset + i] =
6568                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6569                         }
6570                         bo_offset += table_size;
6571                 }
6572         }
6573 }
6574
6575 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6576                                 bool enable)
6577 {
6578         u32 data, orig;
6579
6580         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6581                 orig = data = RREG32(RLC_PG_CNTL);
6582                 data |= GFX_PG_ENABLE;
6583                 if (orig != data)
6584                         WREG32(RLC_PG_CNTL, data);
6585
6586                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6587                 data |= AUTO_PG_EN;
6588                 if (orig != data)
6589                         WREG32(RLC_AUTO_PG_CTRL, data);
6590         } else {
6591                 orig = data = RREG32(RLC_PG_CNTL);
6592                 data &= ~GFX_PG_ENABLE;
6593                 if (orig != data)
6594                         WREG32(RLC_PG_CNTL, data);
6595
6596                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6597                 data &= ~AUTO_PG_EN;
6598                 if (orig != data)
6599                         WREG32(RLC_AUTO_PG_CTRL, data);
6600
6601                 data = RREG32(DB_RENDER_CONTROL);
6602         }
6603 }
6604
6605 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6606 {
6607         u32 mask = 0, tmp, tmp1;
6608         int i;
6609
6610         mutex_lock(&rdev->grbm_idx_mutex);
6611         cik_select_se_sh(rdev, se, sh);
6612         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6613         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6614         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6615         mutex_unlock(&rdev->grbm_idx_mutex);
6616
6617         tmp &= 0xffff0000;
6618
6619         tmp |= tmp1;
6620         tmp >>= 16;
6621
6622         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6623                 mask <<= 1;
6624                 mask |= 1;
6625         }
6626
6627         return (~tmp) & mask;
6628 }
6629
6630 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6631 {
6632         u32 i, j, k, active_cu_number = 0;
6633         u32 mask, counter, cu_bitmap;
6634         u32 tmp = 0;
6635
6636         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6637                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6638                         mask = 1;
6639                         cu_bitmap = 0;
6640                         counter = 0;
6641                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6642                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6643                                         if (counter < 2)
6644                                                 cu_bitmap |= mask;
6645                                         counter ++;
6646                                 }
6647                                 mask <<= 1;
6648                         }
6649
6650                         active_cu_number += counter;
6651                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6652                 }
6653         }
6654
6655         WREG32(RLC_PG_AO_CU_MASK, tmp);
6656
6657         tmp = RREG32(RLC_MAX_PG_CU);
6658         tmp &= ~MAX_PU_CU_MASK;
6659         tmp |= MAX_PU_CU(active_cu_number);
6660         WREG32(RLC_MAX_PG_CU, tmp);
6661 }
6662
6663 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6664                                        bool enable)
6665 {
6666         u32 data, orig;
6667
6668         orig = data = RREG32(RLC_PG_CNTL);
6669         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6670                 data |= STATIC_PER_CU_PG_ENABLE;
6671         else
6672                 data &= ~STATIC_PER_CU_PG_ENABLE;
6673         if (orig != data)
6674                 WREG32(RLC_PG_CNTL, data);
6675 }
6676
6677 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6678                                         bool enable)
6679 {
6680         u32 data, orig;
6681
6682         orig = data = RREG32(RLC_PG_CNTL);
6683         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6684                 data |= DYN_PER_CU_PG_ENABLE;
6685         else
6686                 data &= ~DYN_PER_CU_PG_ENABLE;
6687         if (orig != data)
6688                 WREG32(RLC_PG_CNTL, data);
6689 }
6690
6691 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6692 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6693
6694 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6695 {
6696         u32 data, orig;
6697         u32 i;
6698
6699         if (rdev->rlc.cs_data) {
6700                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6701                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6702                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6703                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6704         } else {
6705                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6706                 for (i = 0; i < 3; i++)
6707                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6708         }
6709         if (rdev->rlc.reg_list) {
6710                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6711                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6712                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6713         }
6714
6715         orig = data = RREG32(RLC_PG_CNTL);
6716         data |= GFX_PG_SRC;
6717         if (orig != data)
6718                 WREG32(RLC_PG_CNTL, data);
6719
6720         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6721         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6722
6723         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6724         data &= ~IDLE_POLL_COUNT_MASK;
6725         data |= IDLE_POLL_COUNT(0x60);
6726         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6727
6728         data = 0x10101010;
6729         WREG32(RLC_PG_DELAY, data);
6730
6731         data = RREG32(RLC_PG_DELAY_2);
6732         data &= ~0xff;
6733         data |= 0x3;
6734         WREG32(RLC_PG_DELAY_2, data);
6735
6736         data = RREG32(RLC_AUTO_PG_CTRL);
6737         data &= ~GRBM_REG_SGIT_MASK;
6738         data |= GRBM_REG_SGIT(0x700);
6739         WREG32(RLC_AUTO_PG_CTRL, data);
6740
6741 }
6742
6743 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6744 {
6745         cik_enable_gfx_cgpg(rdev, enable);
6746         cik_enable_gfx_static_mgpg(rdev, enable);
6747         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6748 }
6749
6750 u32 cik_get_csb_size(struct radeon_device *rdev)
6751 {
6752         u32 count = 0;
6753         const struct cs_section_def *sect = NULL;
6754         const struct cs_extent_def *ext = NULL;
6755
6756         if (rdev->rlc.cs_data == NULL)
6757                 return 0;
6758
6759         /* begin clear state */
6760         count += 2;
6761         /* context control state */
6762         count += 3;
6763
6764         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6765                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6766                         if (sect->id == SECT_CONTEXT)
6767                                 count += 2 + ext->reg_count;
6768                         else
6769                                 return 0;
6770                 }
6771         }
6772         /* pa_sc_raster_config/pa_sc_raster_config1 */
6773         count += 4;
6774         /* end clear state */
6775         count += 2;
6776         /* clear state */
6777         count += 2;
6778
6779         return count;
6780 }
6781
6782 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6783 {
6784         u32 count = 0, i;
6785         const struct cs_section_def *sect = NULL;
6786         const struct cs_extent_def *ext = NULL;
6787
6788         if (rdev->rlc.cs_data == NULL)
6789                 return;
6790         if (buffer == NULL)
6791                 return;
6792
6793         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6794         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6795
6796         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6797         buffer[count++] = cpu_to_le32(0x80000000);
6798         buffer[count++] = cpu_to_le32(0x80000000);
6799
6800         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6801                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6802                         if (sect->id == SECT_CONTEXT) {
6803                                 buffer[count++] =
6804                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6805                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6806                                 for (i = 0; i < ext->reg_count; i++)
6807                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6808                         } else {
6809                                 return;
6810                         }
6811                 }
6812         }
6813
6814         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6815         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6816         switch (rdev->family) {
6817         case CHIP_BONAIRE:
6818                 buffer[count++] = cpu_to_le32(0x16000012);
6819                 buffer[count++] = cpu_to_le32(0x00000000);
6820                 break;
6821         case CHIP_KAVERI:
6822                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6823                 buffer[count++] = cpu_to_le32(0x00000000);
6824                 break;
6825         case CHIP_KABINI:
6826         case CHIP_MULLINS:
6827                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6828                 buffer[count++] = cpu_to_le32(0x00000000);
6829                 break;
6830         case CHIP_HAWAII:
6831                 buffer[count++] = cpu_to_le32(0x3a00161a);
6832                 buffer[count++] = cpu_to_le32(0x0000002e);
6833                 break;
6834         default:
6835                 buffer[count++] = cpu_to_le32(0x00000000);
6836                 buffer[count++] = cpu_to_le32(0x00000000);
6837                 break;
6838         }
6839
6840         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6841         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6842
6843         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6844         buffer[count++] = cpu_to_le32(0);
6845 }
6846
6847 static void cik_init_pg(struct radeon_device *rdev)
6848 {
6849         if (rdev->pg_flags) {
6850                 cik_enable_sck_slowdown_on_pu(rdev, true);
6851                 cik_enable_sck_slowdown_on_pd(rdev, true);
6852                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6853                         cik_init_gfx_cgpg(rdev);
6854                         cik_enable_cp_pg(rdev, true);
6855                         cik_enable_gds_pg(rdev, true);
6856                 }
6857                 cik_init_ao_cu_mask(rdev);
6858                 cik_update_gfx_pg(rdev, true);
6859         }
6860 }
6861
6862 static void cik_fini_pg(struct radeon_device *rdev)
6863 {
6864         if (rdev->pg_flags) {
6865                 cik_update_gfx_pg(rdev, false);
6866                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6867                         cik_enable_cp_pg(rdev, false);
6868                         cik_enable_gds_pg(rdev, false);
6869                 }
6870         }
6871 }
6872
6873 /*
6874  * Interrupts
6875  * Starting with r6xx, interrupts are handled via a ring buffer.
6876  * Ring buffers are areas of GPU accessible memory that the GPU
6877  * writes interrupt vectors into and the host reads vectors out of.
6878  * There is a rptr (read pointer) that determines where the
6879  * host is currently reading, and a wptr (write pointer)
6880  * which determines where the GPU has written.  When the
6881  * pointers are equal, the ring is idle.  When the GPU
6882  * writes vectors to the ring buffer, it increments the
6883  * wptr.  When there is an interrupt, the host then starts
6884  * fetching commands and processing them until the pointers are
6885  * equal again at which point it updates the rptr.
6886  */
6887
6888 /**
6889  * cik_enable_interrupts - Enable the interrupt ring buffer
6890  *
6891  * @rdev: radeon_device pointer
6892  *
6893  * Enable the interrupt ring buffer (CIK).
6894  */
6895 static void cik_enable_interrupts(struct radeon_device *rdev)
6896 {
6897         u32 ih_cntl = RREG32(IH_CNTL);
6898         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6899
6900         ih_cntl |= ENABLE_INTR;
6901         ih_rb_cntl |= IH_RB_ENABLE;
6902         WREG32(IH_CNTL, ih_cntl);
6903         WREG32(IH_RB_CNTL, ih_rb_cntl);
6904         rdev->ih.enabled = true;
6905 }
6906
6907 /**
6908  * cik_disable_interrupts - Disable the interrupt ring buffer
6909  *
6910  * @rdev: radeon_device pointer
6911  *
6912  * Disable the interrupt ring buffer (CIK).
6913  */
6914 static void cik_disable_interrupts(struct radeon_device *rdev)
6915 {
6916         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6917         u32 ih_cntl = RREG32(IH_CNTL);
6918
6919         ih_rb_cntl &= ~IH_RB_ENABLE;
6920         ih_cntl &= ~ENABLE_INTR;
6921         WREG32(IH_RB_CNTL, ih_rb_cntl);
6922         WREG32(IH_CNTL, ih_cntl);
6923         /* set rptr, wptr to 0 */
6924         WREG32(IH_RB_RPTR, 0);
6925         WREG32(IH_RB_WPTR, 0);
6926         rdev->ih.enabled = false;
6927         rdev->ih.rptr = 0;
6928 }
6929
6930 /**
6931  * cik_disable_interrupt_state - Disable all interrupt sources
6932  *
6933  * @rdev: radeon_device pointer
6934  *
6935  * Clear all interrupt enable bits used by the driver (CIK).
6936  */
6937 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6938 {
6939         u32 tmp;
6940
6941         /* gfx ring */
6942         tmp = RREG32(CP_INT_CNTL_RING0) &
6943                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6944         WREG32(CP_INT_CNTL_RING0, tmp);
6945         /* sdma */
6946         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6947         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6948         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6949         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6950         /* compute queues */
6951         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6952         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6953         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6954         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6955         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6956         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6957         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6958         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6959         /* grbm */
6960         WREG32(GRBM_INT_CNTL, 0);
6961         /* SRBM */
6962         WREG32(SRBM_INT_CNTL, 0);
6963         /* vline/vblank, etc. */
6964         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6965         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6966         if (rdev->num_crtc >= 4) {
6967                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6968                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6969         }
6970         if (rdev->num_crtc >= 6) {
6971                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6972                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6973         }
6974         /* pflip */
6975         if (rdev->num_crtc >= 2) {
6976                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6977                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6978         }
6979         if (rdev->num_crtc >= 4) {
6980                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6981                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6982         }
6983         if (rdev->num_crtc >= 6) {
6984                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6985                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6986         }
6987
6988         /* dac hotplug */
6989         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6990
6991         /* digital hotplug */
6992         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6993         WREG32(DC_HPD1_INT_CONTROL, tmp);
6994         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6995         WREG32(DC_HPD2_INT_CONTROL, tmp);
6996         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6997         WREG32(DC_HPD3_INT_CONTROL, tmp);
6998         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6999         WREG32(DC_HPD4_INT_CONTROL, tmp);
7000         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7001         WREG32(DC_HPD5_INT_CONTROL, tmp);
7002         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7003         WREG32(DC_HPD6_INT_CONTROL, tmp);
7004
7005 }
7006
7007 /**
7008  * cik_irq_init - init and enable the interrupt ring
7009  *
7010  * @rdev: radeon_device pointer
7011  *
7012  * Allocate a ring buffer for the interrupt controller,
7013  * enable the RLC, disable interrupts, enable the IH
7014  * ring buffer and enable it (CIK).
7015  * Called at device load and reume.
7016  * Returns 0 for success, errors for failure.
7017  */
7018 static int cik_irq_init(struct radeon_device *rdev)
7019 {
7020         int ret = 0;
7021         int rb_bufsz;
7022         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7023
7024         /* allocate ring */
7025         ret = r600_ih_ring_alloc(rdev);
7026         if (ret)
7027                 return ret;
7028
7029         /* disable irqs */
7030         cik_disable_interrupts(rdev);
7031
7032         /* init rlc */
7033         ret = cik_rlc_resume(rdev);
7034         if (ret) {
7035                 r600_ih_ring_fini(rdev);
7036                 return ret;
7037         }
7038
7039         /* setup interrupt control */
7040         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7041         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7042         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7043         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7044          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7045          */
7046         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7047         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7048         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7049         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7050
7051         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7052         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7053
7054         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7055                       IH_WPTR_OVERFLOW_CLEAR |
7056                       (rb_bufsz << 1));
7057
7058         if (rdev->wb.enabled)
7059                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7060
7061         /* set the writeback address whether it's enabled or not */
7062         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7063         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7064
7065         WREG32(IH_RB_CNTL, ih_rb_cntl);
7066
7067         /* set rptr, wptr to 0 */
7068         WREG32(IH_RB_RPTR, 0);
7069         WREG32(IH_RB_WPTR, 0);
7070
7071         /* Default settings for IH_CNTL (disabled at first) */
7072         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7073         /* RPTR_REARM only works if msi's are enabled */
7074         if (rdev->msi_enabled)
7075                 ih_cntl |= RPTR_REARM;
7076         WREG32(IH_CNTL, ih_cntl);
7077
7078         /* force the active interrupt state to all disabled */
7079         cik_disable_interrupt_state(rdev);
7080
7081         pci_set_master(rdev->pdev);
7082
7083         /* enable irqs */
7084         cik_enable_interrupts(rdev);
7085
7086         return ret;
7087 }
7088
7089 /**
7090  * cik_irq_set - enable/disable interrupt sources
7091  *
7092  * @rdev: radeon_device pointer
7093  *
7094  * Enable interrupt sources on the GPU (vblanks, hpd,
7095  * etc.) (CIK).
7096  * Returns 0 for success, errors for failure.
7097  */
7098 int cik_irq_set(struct radeon_device *rdev)
7099 {
7100         u32 cp_int_cntl;
7101         u32 cp_m1p0;
7102         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7103         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7104         u32 grbm_int_cntl = 0;
7105         u32 dma_cntl, dma_cntl1;
7106
7107         if (!rdev->irq.installed) {
7108                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7109                 return -EINVAL;
7110         }
7111         /* don't enable anything if the ih is disabled */
7112         if (!rdev->ih.enabled) {
7113                 cik_disable_interrupts(rdev);
7114                 /* force the active interrupt state to all disabled */
7115                 cik_disable_interrupt_state(rdev);
7116                 return 0;
7117         }
7118
7119         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7120                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7121         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7122
7123         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7124         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7125         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7126         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7127         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7128         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7129
7130         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7131         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7132
7133         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7134
7135         /* enable CP interrupts on all rings */
7136         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7137                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7138                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7139         }
7140         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7141                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7142                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7143                 if (ring->me == 1) {
7144                         switch (ring->pipe) {
7145                         case 0:
7146                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7147                                 break;
7148                         default:
7149                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7150                                 break;
7151                         }
7152                 } else {
7153                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7154                 }
7155         }
7156         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7157                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7158                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7159                 if (ring->me == 1) {
7160                         switch (ring->pipe) {
7161                         case 0:
7162                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7163                                 break;
7164                         default:
7165                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7166                                 break;
7167                         }
7168                 } else {
7169                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7170                 }
7171         }
7172
7173         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7174                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7175                 dma_cntl |= TRAP_ENABLE;
7176         }
7177
7178         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7179                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7180                 dma_cntl1 |= TRAP_ENABLE;
7181         }
7182
7183         if (rdev->irq.crtc_vblank_int[0] ||
7184             atomic_read(&rdev->irq.pflip[0])) {
7185                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7186                 crtc1 |= VBLANK_INTERRUPT_MASK;
7187         }
7188         if (rdev->irq.crtc_vblank_int[1] ||
7189             atomic_read(&rdev->irq.pflip[1])) {
7190                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7191                 crtc2 |= VBLANK_INTERRUPT_MASK;
7192         }
7193         if (rdev->irq.crtc_vblank_int[2] ||
7194             atomic_read(&rdev->irq.pflip[2])) {
7195                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7196                 crtc3 |= VBLANK_INTERRUPT_MASK;
7197         }
7198         if (rdev->irq.crtc_vblank_int[3] ||
7199             atomic_read(&rdev->irq.pflip[3])) {
7200                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7201                 crtc4 |= VBLANK_INTERRUPT_MASK;
7202         }
7203         if (rdev->irq.crtc_vblank_int[4] ||
7204             atomic_read(&rdev->irq.pflip[4])) {
7205                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7206                 crtc5 |= VBLANK_INTERRUPT_MASK;
7207         }
7208         if (rdev->irq.crtc_vblank_int[5] ||
7209             atomic_read(&rdev->irq.pflip[5])) {
7210                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7211                 crtc6 |= VBLANK_INTERRUPT_MASK;
7212         }
7213         if (rdev->irq.hpd[0]) {
7214                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7215                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7216         }
7217         if (rdev->irq.hpd[1]) {
7218                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7219                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7220         }
7221         if (rdev->irq.hpd[2]) {
7222                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7223                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7224         }
7225         if (rdev->irq.hpd[3]) {
7226                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7227                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7228         }
7229         if (rdev->irq.hpd[4]) {
7230                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7231                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7232         }
7233         if (rdev->irq.hpd[5]) {
7234                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7235                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7236         }
7237
7238         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7239
7240         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7241         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7242
7243         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7244
7245         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7246
7247         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7248         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7249         if (rdev->num_crtc >= 4) {
7250                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7251                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7252         }
7253         if (rdev->num_crtc >= 6) {
7254                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7255                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7256         }
7257
7258         if (rdev->num_crtc >= 2) {
7259                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7260                        GRPH_PFLIP_INT_MASK);
7261                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7262                        GRPH_PFLIP_INT_MASK);
7263         }
7264         if (rdev->num_crtc >= 4) {
7265                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7266                        GRPH_PFLIP_INT_MASK);
7267                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7268                        GRPH_PFLIP_INT_MASK);
7269         }
7270         if (rdev->num_crtc >= 6) {
7271                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7272                        GRPH_PFLIP_INT_MASK);
7273                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7274                        GRPH_PFLIP_INT_MASK);
7275         }
7276
7277         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7278         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7279         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7280         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7281         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7282         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7283
7284         /* posting read */
7285         RREG32(SRBM_STATUS);
7286
7287         return 0;
7288 }
7289
7290 /**
7291  * cik_irq_ack - ack interrupt sources
7292  *
7293  * @rdev: radeon_device pointer
7294  *
7295  * Ack interrupt sources on the GPU (vblanks, hpd,
7296  * etc.) (CIK).  Certain interrupts sources are sw
7297  * generated and do not require an explicit ack.
7298  */
7299 static inline void cik_irq_ack(struct radeon_device *rdev)
7300 {
7301         u32 tmp;
7302
7303         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7304         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7305         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7306         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7307         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7308         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7309         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7310
7311         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7312                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7313         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7314                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7315         if (rdev->num_crtc >= 4) {
7316                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7317                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7318                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7319                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7320         }
7321         if (rdev->num_crtc >= 6) {
7322                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7323                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7324                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7325                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7326         }
7327
7328         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7329                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7330                        GRPH_PFLIP_INT_CLEAR);
7331         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7332                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7333                        GRPH_PFLIP_INT_CLEAR);
7334         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7335                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7336         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7337                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7338         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7339                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7340         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7341                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7342
7343         if (rdev->num_crtc >= 4) {
7344                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7345                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7346                                GRPH_PFLIP_INT_CLEAR);
7347                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7348                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7349                                GRPH_PFLIP_INT_CLEAR);
7350                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7351                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7352                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7353                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7354                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7355                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7356                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7357                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7358         }
7359
7360         if (rdev->num_crtc >= 6) {
7361                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7362                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7363                                GRPH_PFLIP_INT_CLEAR);
7364                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7365                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7366                                GRPH_PFLIP_INT_CLEAR);
7367                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7368                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7369                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7370                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7371                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7372                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7373                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7374                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7375         }
7376
7377         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7378                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7379                 tmp |= DC_HPDx_INT_ACK;
7380                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7381         }
7382         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7383                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7384                 tmp |= DC_HPDx_INT_ACK;
7385                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7386         }
7387         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7388                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7389                 tmp |= DC_HPDx_INT_ACK;
7390                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7391         }
7392         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7393                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7394                 tmp |= DC_HPDx_INT_ACK;
7395                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7396         }
7397         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7398                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7399                 tmp |= DC_HPDx_INT_ACK;
7400                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7401         }
7402         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7403                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7404                 tmp |= DC_HPDx_INT_ACK;
7405                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7406         }
7407         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7408                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7409                 tmp |= DC_HPDx_RX_INT_ACK;
7410                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7411         }
7412         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7413                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7414                 tmp |= DC_HPDx_RX_INT_ACK;
7415                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7416         }
7417         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7418                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7419                 tmp |= DC_HPDx_RX_INT_ACK;
7420                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7421         }
7422         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7423                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7424                 tmp |= DC_HPDx_RX_INT_ACK;
7425                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7426         }
7427         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7428                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7429                 tmp |= DC_HPDx_RX_INT_ACK;
7430                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7431         }
7432         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7433                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7434                 tmp |= DC_HPDx_RX_INT_ACK;
7435                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7436         }
7437 }
7438
7439 /**
7440  * cik_irq_disable - disable interrupts
7441  *
7442  * @rdev: radeon_device pointer
7443  *
7444  * Disable interrupts on the hw (CIK).
7445  */
7446 static void cik_irq_disable(struct radeon_device *rdev)
7447 {
7448         cik_disable_interrupts(rdev);
7449         /* Wait and acknowledge irq */
7450         mdelay(1);
7451         cik_irq_ack(rdev);
7452         cik_disable_interrupt_state(rdev);
7453 }
7454
7455 /**
7456  * cik_irq_disable - disable interrupts for suspend
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Disable interrupts and stop the RLC (CIK).
7461  * Used for suspend.
7462  */
7463 static void cik_irq_suspend(struct radeon_device *rdev)
7464 {
7465         cik_irq_disable(rdev);
7466         cik_rlc_stop(rdev);
7467 }
7468
7469 /**
7470  * cik_irq_fini - tear down interrupt support
7471  *
7472  * @rdev: radeon_device pointer
7473  *
7474  * Disable interrupts on the hw and free the IH ring
7475  * buffer (CIK).
7476  * Used for driver unload.
7477  */
7478 static void cik_irq_fini(struct radeon_device *rdev)
7479 {
7480         cik_irq_suspend(rdev);
7481         r600_ih_ring_fini(rdev);
7482 }
7483
7484 /**
7485  * cik_get_ih_wptr - get the IH ring buffer wptr
7486  *
7487  * @rdev: radeon_device pointer
7488  *
7489  * Get the IH ring buffer wptr from either the register
7490  * or the writeback memory buffer (CIK).  Also check for
7491  * ring buffer overflow and deal with it.
7492  * Used by cik_irq_process().
7493  * Returns the value of the wptr.
7494  */
7495 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7496 {
7497         u32 wptr, tmp;
7498
7499         if (rdev->wb.enabled)
7500                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7501         else
7502                 wptr = RREG32(IH_RB_WPTR);
7503
7504         if (wptr & RB_OVERFLOW) {
7505                 wptr &= ~RB_OVERFLOW;
7506                 /* When a ring buffer overflow happen start parsing interrupt
7507                  * from the last not overwritten vector (wptr + 16). Hopefully
7508                  * this should allow us to catchup.
7509                  */
7510                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7511                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7512                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7513                 tmp = RREG32(IH_RB_CNTL);
7514                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7515                 WREG32(IH_RB_CNTL, tmp);
7516         }
7517         return (wptr & rdev->ih.ptr_mask);
7518 }
7519
7520 /*        CIK IV Ring
7521  * Each IV ring entry is 128 bits:
7522  * [7:0]    - interrupt source id
7523  * [31:8]   - reserved
7524  * [59:32]  - interrupt source data
7525  * [63:60]  - reserved
7526  * [71:64]  - RINGID
7527  *            CP:
7528  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7529  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7530  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7531  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7532  *            PIPE_ID - ME0 0=3D
7533  *                    - ME1&2 compute dispatcher (4 pipes each)
7534  *            SDMA:
7535  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7536  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7537  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7538  * [79:72]  - VMID
7539  * [95:80]  - PASID
7540  * [127:96] - reserved
7541  */
7542 /**
7543  * cik_irq_process - interrupt handler
7544  *
7545  * @rdev: radeon_device pointer
7546  *
7547  * Interrupt hander (CIK).  Walk the IH ring,
7548  * ack interrupts and schedule work to handle
7549  * interrupt events.
7550  * Returns irq process return code.
7551  */
7552 int cik_irq_process(struct radeon_device *rdev)
7553 {
7554         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7555         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7556         u32 wptr;
7557         u32 rptr;
7558         u32 src_id, src_data, ring_id;
7559         u8 me_id, pipe_id, queue_id;
7560         u32 ring_index;
7561         bool queue_hotplug = false;
7562         bool queue_dp = false;
7563         bool queue_reset = false;
7564         u32 addr, status, mc_client;
7565         bool queue_thermal = false;
7566
7567         if (!rdev->ih.enabled || rdev->shutdown)
7568                 return IRQ_NONE;
7569
7570         wptr = cik_get_ih_wptr(rdev);
7571
7572 restart_ih:
7573         /* is somebody else already processing irqs? */
7574         if (atomic_xchg(&rdev->ih.lock, 1))
7575                 return IRQ_NONE;
7576
7577         rptr = rdev->ih.rptr;
7578         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7579
7580         /* Order reading of wptr vs. reading of IH ring data */
7581         rmb();
7582
7583         /* display interrupts */
7584         cik_irq_ack(rdev);
7585
7586         while (rptr != wptr) {
7587                 /* wptr/rptr are in bytes! */
7588                 ring_index = rptr / 4;
7589
7590                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7591                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7592                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7593
7594                 switch (src_id) {
7595                 case 1: /* D1 vblank/vline */
7596                         switch (src_data) {
7597                         case 0: /* D1 vblank */
7598                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7599                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7600
7601                                 if (rdev->irq.crtc_vblank_int[0]) {
7602                                         drm_handle_vblank(rdev->ddev, 0);
7603                                         rdev->pm.vblank_sync = true;
7604                                         wake_up(&rdev->irq.vblank_queue);
7605                                 }
7606                                 if (atomic_read(&rdev->irq.pflip[0]))
7607                                         radeon_crtc_handle_vblank(rdev, 0);
7608                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7609                                 DRM_DEBUG("IH: D1 vblank\n");
7610
7611                                 break;
7612                         case 1: /* D1 vline */
7613                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7614                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7615
7616                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7617                                 DRM_DEBUG("IH: D1 vline\n");
7618
7619                                 break;
7620                         default:
7621                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7622                                 break;
7623                         }
7624                         break;
7625                 case 2: /* D2 vblank/vline */
7626                         switch (src_data) {
7627                         case 0: /* D2 vblank */
7628                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7629                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7630
7631                                 if (rdev->irq.crtc_vblank_int[1]) {
7632                                         drm_handle_vblank(rdev->ddev, 1);
7633                                         rdev->pm.vblank_sync = true;
7634                                         wake_up(&rdev->irq.vblank_queue);
7635                                 }
7636                                 if (atomic_read(&rdev->irq.pflip[1]))
7637                                         radeon_crtc_handle_vblank(rdev, 1);
7638                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7639                                 DRM_DEBUG("IH: D2 vblank\n");
7640
7641                                 break;
7642                         case 1: /* D2 vline */
7643                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7644                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7645
7646                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7647                                 DRM_DEBUG("IH: D2 vline\n");
7648
7649                                 break;
7650                         default:
7651                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7652                                 break;
7653                         }
7654                         break;
7655                 case 3: /* D3 vblank/vline */
7656                         switch (src_data) {
7657                         case 0: /* D3 vblank */
7658                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7659                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7660
7661                                 if (rdev->irq.crtc_vblank_int[2]) {
7662                                         drm_handle_vblank(rdev->ddev, 2);
7663                                         rdev->pm.vblank_sync = true;
7664                                         wake_up(&rdev->irq.vblank_queue);
7665                                 }
7666                                 if (atomic_read(&rdev->irq.pflip[2]))
7667                                         radeon_crtc_handle_vblank(rdev, 2);
7668                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7669                                 DRM_DEBUG("IH: D3 vblank\n");
7670
7671                                 break;
7672                         case 1: /* D3 vline */
7673                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7674                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7675
7676                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7677                                 DRM_DEBUG("IH: D3 vline\n");
7678
7679                                 break;
7680                         default:
7681                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7682                                 break;
7683                         }
7684                         break;
7685                 case 4: /* D4 vblank/vline */
7686                         switch (src_data) {
7687                         case 0: /* D4 vblank */
7688                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7689                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7690
7691                                 if (rdev->irq.crtc_vblank_int[3]) {
7692                                         drm_handle_vblank(rdev->ddev, 3);
7693                                         rdev->pm.vblank_sync = true;
7694                                         wake_up(&rdev->irq.vblank_queue);
7695                                 }
7696                                 if (atomic_read(&rdev->irq.pflip[3]))
7697                                         radeon_crtc_handle_vblank(rdev, 3);
7698                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7699                                 DRM_DEBUG("IH: D4 vblank\n");
7700
7701                                 break;
7702                         case 1: /* D4 vline */
7703                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7704                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7705
7706                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7707                                 DRM_DEBUG("IH: D4 vline\n");
7708
7709                                 break;
7710                         default:
7711                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7712                                 break;
7713                         }
7714                         break;
7715                 case 5: /* D5 vblank/vline */
7716                         switch (src_data) {
7717                         case 0: /* D5 vblank */
7718                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7719                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7720
7721                                 if (rdev->irq.crtc_vblank_int[4]) {
7722                                         drm_handle_vblank(rdev->ddev, 4);
7723                                         rdev->pm.vblank_sync = true;
7724                                         wake_up(&rdev->irq.vblank_queue);
7725                                 }
7726                                 if (atomic_read(&rdev->irq.pflip[4]))
7727                                         radeon_crtc_handle_vblank(rdev, 4);
7728                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7729                                 DRM_DEBUG("IH: D5 vblank\n");
7730
7731                                 break;
7732                         case 1: /* D5 vline */
7733                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7734                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7735
7736                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7737                                 DRM_DEBUG("IH: D5 vline\n");
7738
7739                                 break;
7740                         default:
7741                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7742                                 break;
7743                         }
7744                         break;
7745                 case 6: /* D6 vblank/vline */
7746                         switch (src_data) {
7747                         case 0: /* D6 vblank */
7748                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7749                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7750
7751                                 if (rdev->irq.crtc_vblank_int[5]) {
7752                                         drm_handle_vblank(rdev->ddev, 5);
7753                                         rdev->pm.vblank_sync = true;
7754                                         wake_up(&rdev->irq.vblank_queue);
7755                                 }
7756                                 if (atomic_read(&rdev->irq.pflip[5]))
7757                                         radeon_crtc_handle_vblank(rdev, 5);
7758                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7759                                 DRM_DEBUG("IH: D6 vblank\n");
7760
7761                                 break;
7762                         case 1: /* D6 vline */
7763                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7764                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7765
7766                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7767                                 DRM_DEBUG("IH: D6 vline\n");
7768
7769                                 break;
7770                         default:
7771                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7772                                 break;
7773                         }
7774                         break;
7775                 case 8: /* D1 page flip */
7776                 case 10: /* D2 page flip */
7777                 case 12: /* D3 page flip */
7778                 case 14: /* D4 page flip */
7779                 case 16: /* D5 page flip */
7780                 case 18: /* D6 page flip */
7781                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7782                         if (radeon_use_pflipirq > 0)
7783                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7784                         break;
7785                 case 42: /* HPD hotplug */
7786                         switch (src_data) {
7787                         case 0:
7788                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7789                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7790
7791                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7792                                 queue_hotplug = true;
7793                                 DRM_DEBUG("IH: HPD1\n");
7794
7795                                 break;
7796                         case 1:
7797                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7798                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7799
7800                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7801                                 queue_hotplug = true;
7802                                 DRM_DEBUG("IH: HPD2\n");
7803
7804                                 break;
7805                         case 2:
7806                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7807                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7808
7809                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7810                                 queue_hotplug = true;
7811                                 DRM_DEBUG("IH: HPD3\n");
7812
7813                                 break;
7814                         case 3:
7815                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7816                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7817
7818                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7819                                 queue_hotplug = true;
7820                                 DRM_DEBUG("IH: HPD4\n");
7821
7822                                 break;
7823                         case 4:
7824                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7825                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7826
7827                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7828                                 queue_hotplug = true;
7829                                 DRM_DEBUG("IH: HPD5\n");
7830
7831                                 break;
7832                         case 5:
7833                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7834                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7835
7836                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7837                                 queue_hotplug = true;
7838                                 DRM_DEBUG("IH: HPD6\n");
7839
7840                                 break;
7841                         case 6:
7842                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7843                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7844
7845                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7846                                 queue_dp = true;
7847                                 DRM_DEBUG("IH: HPD_RX 1\n");
7848
7849                                 break;
7850                         case 7:
7851                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7852                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7853
7854                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7855                                 queue_dp = true;
7856                                 DRM_DEBUG("IH: HPD_RX 2\n");
7857
7858                                 break;
7859                         case 8:
7860                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7861                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7862
7863                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7864                                 queue_dp = true;
7865                                 DRM_DEBUG("IH: HPD_RX 3\n");
7866
7867                                 break;
7868                         case 9:
7869                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7870                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7871
7872                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7873                                 queue_dp = true;
7874                                 DRM_DEBUG("IH: HPD_RX 4\n");
7875
7876                                 break;
7877                         case 10:
7878                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7879                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7880
7881                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7882                                 queue_dp = true;
7883                                 DRM_DEBUG("IH: HPD_RX 5\n");
7884
7885                                 break;
7886                         case 11:
7887                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7888                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7889
7890                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7891                                 queue_dp = true;
7892                                 DRM_DEBUG("IH: HPD_RX 6\n");
7893
7894                                 break;
7895                         default:
7896                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7897                                 break;
7898                         }
7899                         break;
7900                 case 96:
7901                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7902                         WREG32(SRBM_INT_ACK, 0x1);
7903                         break;
7904                 case 124: /* UVD */
7905                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7906                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7907                         break;
7908                 case 146:
7909                 case 147:
7910                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7911                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7912                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7913                         /* reset addr and status */
7914                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7915                         if (addr == 0x0 && status == 0x0)
7916                                 break;
7917                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7918                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7919                                 addr);
7920                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7921                                 status);
7922                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7923                         break;
7924                 case 167: /* VCE */
7925                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7926                         switch (src_data) {
7927                         case 0:
7928                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7929                                 break;
7930                         case 1:
7931                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7932                                 break;
7933                         default:
7934                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7935                                 break;
7936                         }
7937                         break;
7938                 case 176: /* GFX RB CP_INT */
7939                 case 177: /* GFX IB CP_INT */
7940                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7941                         break;
7942                 case 181: /* CP EOP event */
7943                         DRM_DEBUG("IH: CP EOP\n");
7944                         /* XXX check the bitfield order! */
7945                         me_id = (ring_id & 0x60) >> 5;
7946                         pipe_id = (ring_id & 0x18) >> 3;
7947                         queue_id = (ring_id & 0x7) >> 0;
7948                         switch (me_id) {
7949                         case 0:
7950                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7951                                 break;
7952                         case 1:
7953                         case 2:
7954                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7955                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7956                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7957                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7958                                 break;
7959                         }
7960                         break;
7961                 case 184: /* CP Privileged reg access */
7962                         DRM_ERROR("Illegal register access in command stream\n");
7963                         /* XXX check the bitfield order! */
7964                         me_id = (ring_id & 0x60) >> 5;
7965                         pipe_id = (ring_id & 0x18) >> 3;
7966                         queue_id = (ring_id & 0x7) >> 0;
7967                         switch (me_id) {
7968                         case 0:
7969                                 /* This results in a full GPU reset, but all we need to do is soft
7970                                  * reset the CP for gfx
7971                                  */
7972                                 queue_reset = true;
7973                                 break;
7974                         case 1:
7975                                 /* XXX compute */
7976                                 queue_reset = true;
7977                                 break;
7978                         case 2:
7979                                 /* XXX compute */
7980                                 queue_reset = true;
7981                                 break;
7982                         }
7983                         break;
7984                 case 185: /* CP Privileged inst */
7985                         DRM_ERROR("Illegal instruction in command stream\n");
7986                         /* XXX check the bitfield order! */
7987                         me_id = (ring_id & 0x60) >> 5;
7988                         pipe_id = (ring_id & 0x18) >> 3;
7989                         queue_id = (ring_id & 0x7) >> 0;
7990                         switch (me_id) {
7991                         case 0:
7992                                 /* This results in a full GPU reset, but all we need to do is soft
7993                                  * reset the CP for gfx
7994                                  */
7995                                 queue_reset = true;
7996                                 break;
7997                         case 1:
7998                                 /* XXX compute */
7999                                 queue_reset = true;
8000                                 break;
8001                         case 2:
8002                                 /* XXX compute */
8003                                 queue_reset = true;
8004                                 break;
8005                         }
8006                         break;
8007                 case 224: /* SDMA trap event */
8008                         /* XXX check the bitfield order! */
8009                         me_id = (ring_id & 0x3) >> 0;
8010                         queue_id = (ring_id & 0xc) >> 2;
8011                         DRM_DEBUG("IH: SDMA trap\n");
8012                         switch (me_id) {
8013                         case 0:
8014                                 switch (queue_id) {
8015                                 case 0:
8016                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8017                                         break;
8018                                 case 1:
8019                                         /* XXX compute */
8020                                         break;
8021                                 case 2:
8022                                         /* XXX compute */
8023                                         break;
8024                                 }
8025                                 break;
8026                         case 1:
8027                                 switch (queue_id) {
8028                                 case 0:
8029                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8030                                         break;
8031                                 case 1:
8032                                         /* XXX compute */
8033                                         break;
8034                                 case 2:
8035                                         /* XXX compute */
8036                                         break;
8037                                 }
8038                                 break;
8039                         }
8040                         break;
8041                 case 230: /* thermal low to high */
8042                         DRM_DEBUG("IH: thermal low to high\n");
8043                         rdev->pm.dpm.thermal.high_to_low = false;
8044                         queue_thermal = true;
8045                         break;
8046                 case 231: /* thermal high to low */
8047                         DRM_DEBUG("IH: thermal high to low\n");
8048                         rdev->pm.dpm.thermal.high_to_low = true;
8049                         queue_thermal = true;
8050                         break;
8051                 case 233: /* GUI IDLE */
8052                         DRM_DEBUG("IH: GUI idle\n");
8053                         break;
8054                 case 241: /* SDMA Privileged inst */
8055                 case 247: /* SDMA Privileged inst */
8056                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8057                         /* XXX check the bitfield order! */
8058                         me_id = (ring_id & 0x3) >> 0;
8059                         queue_id = (ring_id & 0xc) >> 2;
8060                         switch (me_id) {
8061                         case 0:
8062                                 switch (queue_id) {
8063                                 case 0:
8064                                         queue_reset = true;
8065                                         break;
8066                                 case 1:
8067                                         /* XXX compute */
8068                                         queue_reset = true;
8069                                         break;
8070                                 case 2:
8071                                         /* XXX compute */
8072                                         queue_reset = true;
8073                                         break;
8074                                 }
8075                                 break;
8076                         case 1:
8077                                 switch (queue_id) {
8078                                 case 0:
8079                                         queue_reset = true;
8080                                         break;
8081                                 case 1:
8082                                         /* XXX compute */
8083                                         queue_reset = true;
8084                                         break;
8085                                 case 2:
8086                                         /* XXX compute */
8087                                         queue_reset = true;
8088                                         break;
8089                                 }
8090                                 break;
8091                         }
8092                         break;
8093                 default:
8094                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8095                         break;
8096                 }
8097
8098                 /* wptr/rptr are in bytes! */
8099                 rptr += 16;
8100                 rptr &= rdev->ih.ptr_mask;
8101                 WREG32(IH_RB_RPTR, rptr);
8102         }
8103         if (queue_dp)
8104                 schedule_work(&rdev->dp_work);
8105         if (queue_hotplug)
8106                 schedule_delayed_work(&rdev->hotplug_work, 0);
8107         if (queue_reset) {
8108                 rdev->needs_reset = true;
8109                 wake_up_all(&rdev->fence_queue);
8110         }
8111         if (queue_thermal)
8112                 schedule_work(&rdev->pm.dpm.thermal.work);
8113         rdev->ih.rptr = rptr;
8114         atomic_set(&rdev->ih.lock, 0);
8115
8116         /* make sure wptr hasn't changed while processing */
8117         wptr = cik_get_ih_wptr(rdev);
8118         if (wptr != rptr)
8119                 goto restart_ih;
8120
8121         return IRQ_HANDLED;
8122 }
8123
8124 /*
8125  * startup/shutdown callbacks
8126  */
8127 static void cik_uvd_init(struct radeon_device *rdev)
8128 {
8129         int r;
8130
8131         if (!rdev->has_uvd)
8132                 return;
8133
8134         r = radeon_uvd_init(rdev);
8135         if (r) {
8136                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8137                 /*
8138                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8139                  * to early fails cik_uvd_start() and thus nothing happens
8140                  * there. So it is pointless to try to go through that code
8141                  * hence why we disable uvd here.
8142                  */
8143                 rdev->has_uvd = 0;
8144                 return;
8145         }
8146         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8147         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8148 }
8149
8150 static void cik_uvd_start(struct radeon_device *rdev)
8151 {
8152         int r;
8153
8154         if (!rdev->has_uvd)
8155                 return;
8156
8157         r = radeon_uvd_resume(rdev);
8158         if (r) {
8159                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8160                 goto error;
8161         }
8162         r = uvd_v4_2_resume(rdev);
8163         if (r) {
8164                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8165                 goto error;
8166         }
8167         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8168         if (r) {
8169                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8170                 goto error;
8171         }
8172         return;
8173
8174 error:
8175         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8176 }
8177
8178 static void cik_uvd_resume(struct radeon_device *rdev)
8179 {
8180         struct radeon_ring *ring;
8181         int r;
8182
8183         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8184                 return;
8185
8186         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8187         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8188         if (r) {
8189                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8190                 return;
8191         }
8192         r = uvd_v1_0_init(rdev);
8193         if (r) {
8194                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8195                 return;
8196         }
8197 }
8198
8199 static void cik_vce_init(struct radeon_device *rdev)
8200 {
8201         int r;
8202
8203         if (!rdev->has_vce)
8204                 return;
8205
8206         r = radeon_vce_init(rdev);
8207         if (r) {
8208                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8209                 /*
8210                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8211                  * to early fails cik_vce_start() and thus nothing happens
8212                  * there. So it is pointless to try to go through that code
8213                  * hence why we disable vce here.
8214                  */
8215                 rdev->has_vce = 0;
8216                 return;
8217         }
8218         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8219         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8220         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8221         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8222 }
8223
8224 static void cik_vce_start(struct radeon_device *rdev)
8225 {
8226         int r;
8227
8228         if (!rdev->has_vce)
8229                 return;
8230
8231         r = radeon_vce_resume(rdev);
8232         if (r) {
8233                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8234                 goto error;
8235         }
8236         r = vce_v2_0_resume(rdev);
8237         if (r) {
8238                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8239                 goto error;
8240         }
8241         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8242         if (r) {
8243                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8244                 goto error;
8245         }
8246         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8247         if (r) {
8248                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8249                 goto error;
8250         }
8251         return;
8252
8253 error:
8254         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8255         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8256 }
8257
8258 static void cik_vce_resume(struct radeon_device *rdev)
8259 {
8260         struct radeon_ring *ring;
8261         int r;
8262
8263         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8264                 return;
8265
8266         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8267         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8268         if (r) {
8269                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8270                 return;
8271         }
8272         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8273         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8274         if (r) {
8275                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8276                 return;
8277         }
8278         r = vce_v1_0_init(rdev);
8279         if (r) {
8280                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8281                 return;
8282         }
8283 }
8284
8285 /**
8286  * cik_startup - program the asic to a functional state
8287  *
8288  * @rdev: radeon_device pointer
8289  *
8290  * Programs the asic to a functional state (CIK).
8291  * Called by cik_init() and cik_resume().
8292  * Returns 0 for success, error for failure.
8293  */
8294 static int cik_startup(struct radeon_device *rdev)
8295 {
8296         struct radeon_ring *ring;
8297         u32 nop;
8298         int r;
8299
8300         /* enable pcie gen2/3 link */
8301         cik_pcie_gen3_enable(rdev);
8302         /* enable aspm */
8303         cik_program_aspm(rdev);
8304
8305         /* scratch needs to be initialized before MC */
8306         r = r600_vram_scratch_init(rdev);
8307         if (r)
8308                 return r;
8309
8310         cik_mc_program(rdev);
8311
8312         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8313                 r = ci_mc_load_microcode(rdev);
8314                 if (r) {
8315                         DRM_ERROR("Failed to load MC firmware!\n");
8316                         return r;
8317                 }
8318         }
8319
8320         r = cik_pcie_gart_enable(rdev);
8321         if (r)
8322                 return r;
8323         cik_gpu_init(rdev);
8324
8325         /* allocate rlc buffers */
8326         if (rdev->flags & RADEON_IS_IGP) {
8327                 if (rdev->family == CHIP_KAVERI) {
8328                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8329                         rdev->rlc.reg_list_size =
8330                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8331                 } else {
8332                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8333                         rdev->rlc.reg_list_size =
8334                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8335                 }
8336         }
8337         rdev->rlc.cs_data = ci_cs_data;
8338         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8339         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8340         r = sumo_rlc_init(rdev);
8341         if (r) {
8342                 DRM_ERROR("Failed to init rlc BOs!\n");
8343                 return r;
8344         }
8345
8346         /* allocate wb buffer */
8347         r = radeon_wb_init(rdev);
8348         if (r)
8349                 return r;
8350
8351         /* allocate mec buffers */
8352         r = cik_mec_init(rdev);
8353         if (r) {
8354                 DRM_ERROR("Failed to init MEC BOs!\n");
8355                 return r;
8356         }
8357
8358         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8359         if (r) {
8360                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8361                 return r;
8362         }
8363
8364         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8365         if (r) {
8366                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8367                 return r;
8368         }
8369
8370         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8371         if (r) {
8372                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8373                 return r;
8374         }
8375
8376         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8377         if (r) {
8378                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8379                 return r;
8380         }
8381
8382         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8383         if (r) {
8384                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8385                 return r;
8386         }
8387
8388         cik_uvd_start(rdev);
8389         cik_vce_start(rdev);
8390
8391         /* Enable IRQ */
8392         if (!rdev->irq.installed) {
8393                 r = radeon_irq_kms_init(rdev);
8394                 if (r)
8395                         return r;
8396         }
8397
8398         r = cik_irq_init(rdev);
8399         if (r) {
8400                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8401                 radeon_irq_kms_fini(rdev);
8402                 return r;
8403         }
8404         cik_irq_set(rdev);
8405
8406         if (rdev->family == CHIP_HAWAII) {
8407                 if (rdev->new_fw)
8408                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8409                 else
8410                         nop = RADEON_CP_PACKET2;
8411         } else {
8412                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8413         }
8414
8415         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8416         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8417                              nop);
8418         if (r)
8419                 return r;
8420
8421         /* set up the compute queues */
8422         /* type-2 packets are deprecated on MEC, use type-3 instead */
8423         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8424         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8425                              nop);
8426         if (r)
8427                 return r;
8428         ring->me = 1; /* first MEC */
8429         ring->pipe = 0; /* first pipe */
8430         ring->queue = 0; /* first queue */
8431         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8432
8433         /* type-2 packets are deprecated on MEC, use type-3 instead */
8434         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8435         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8436                              nop);
8437         if (r)
8438                 return r;
8439         /* dGPU only have 1 MEC */
8440         ring->me = 1; /* first MEC */
8441         ring->pipe = 0; /* first pipe */
8442         ring->queue = 1; /* second queue */
8443         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8444
8445         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8446         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8447                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8448         if (r)
8449                 return r;
8450
8451         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8452         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8453                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8454         if (r)
8455                 return r;
8456
8457         r = cik_cp_resume(rdev);
8458         if (r)
8459                 return r;
8460
8461         r = cik_sdma_resume(rdev);
8462         if (r)
8463                 return r;
8464
8465         cik_uvd_resume(rdev);
8466         cik_vce_resume(rdev);
8467
8468         r = radeon_ib_pool_init(rdev);
8469         if (r) {
8470                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8471                 return r;
8472         }
8473
8474         r = radeon_vm_manager_init(rdev);
8475         if (r) {
8476                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8477                 return r;
8478         }
8479
8480         r = radeon_audio_init(rdev);
8481         if (r)
8482                 return r;
8483
8484         return 0;
8485 }
8486
8487 /**
8488  * cik_resume - resume the asic to a functional state
8489  *
8490  * @rdev: radeon_device pointer
8491  *
8492  * Programs the asic to a functional state (CIK).
8493  * Called at resume.
8494  * Returns 0 for success, error for failure.
8495  */
8496 int cik_resume(struct radeon_device *rdev)
8497 {
8498         int r;
8499
8500         /* post card */
8501         atom_asic_init(rdev->mode_info.atom_context);
8502
8503         /* init golden registers */
8504         cik_init_golden_registers(rdev);
8505
8506         if (rdev->pm.pm_method == PM_METHOD_DPM)
8507                 radeon_pm_resume(rdev);
8508
8509         rdev->accel_working = true;
8510         r = cik_startup(rdev);
8511         if (r) {
8512                 DRM_ERROR("cik startup failed on resume\n");
8513                 rdev->accel_working = false;
8514                 return r;
8515         }
8516
8517         return r;
8518
8519 }
8520
8521 /**
8522  * cik_suspend - suspend the asic
8523  *
8524  * @rdev: radeon_device pointer
8525  *
8526  * Bring the chip into a state suitable for suspend (CIK).
8527  * Called at suspend.
8528  * Returns 0 for success.
8529  */
8530 int cik_suspend(struct radeon_device *rdev)
8531 {
8532         radeon_pm_suspend(rdev);
8533         radeon_audio_fini(rdev);
8534         radeon_vm_manager_fini(rdev);
8535         cik_cp_enable(rdev, false);
8536         cik_sdma_enable(rdev, false);
8537         if (rdev->has_uvd) {
8538                 uvd_v1_0_fini(rdev);
8539                 radeon_uvd_suspend(rdev);
8540         }
8541         if (rdev->has_vce)
8542                 radeon_vce_suspend(rdev);
8543         cik_fini_pg(rdev);
8544         cik_fini_cg(rdev);
8545         cik_irq_suspend(rdev);
8546         radeon_wb_disable(rdev);
8547         cik_pcie_gart_disable(rdev);
8548         return 0;
8549 }
8550
8551 /* Plan is to move initialization in that function and use
8552  * helper function so that radeon_device_init pretty much
8553  * do nothing more than calling asic specific function. This
8554  * should also allow to remove a bunch of callback function
8555  * like vram_info.
8556  */
8557 /**
8558  * cik_init - asic specific driver and hw init
8559  *
8560  * @rdev: radeon_device pointer
8561  *
8562  * Setup asic specific driver variables and program the hw
8563  * to a functional state (CIK).
8564  * Called at driver startup.
8565  * Returns 0 for success, errors for failure.
8566  */
8567 int cik_init(struct radeon_device *rdev)
8568 {
8569         struct radeon_ring *ring;
8570         int r;
8571
8572         /* Read BIOS */
8573         if (!radeon_get_bios(rdev)) {
8574                 if (ASIC_IS_AVIVO(rdev))
8575                         return -EINVAL;
8576         }
8577         /* Must be an ATOMBIOS */
8578         if (!rdev->is_atom_bios) {
8579                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8580                 return -EINVAL;
8581         }
8582         r = radeon_atombios_init(rdev);
8583         if (r)
8584                 return r;
8585
8586         /* Post card if necessary */
8587         if (!radeon_card_posted(rdev)) {
8588                 if (!rdev->bios) {
8589                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8590                         return -EINVAL;
8591                 }
8592                 DRM_INFO("GPU not posted. posting now...\n");
8593                 atom_asic_init(rdev->mode_info.atom_context);
8594         }
8595         /* init golden registers */
8596         cik_init_golden_registers(rdev);
8597         /* Initialize scratch registers */
8598         cik_scratch_init(rdev);
8599         /* Initialize surface registers */
8600         radeon_surface_init(rdev);
8601         /* Initialize clocks */
8602         radeon_get_clock_info(rdev->ddev);
8603
8604         /* Fence driver */
8605         r = radeon_fence_driver_init(rdev);
8606         if (r)
8607                 return r;
8608
8609         /* initialize memory controller */
8610         r = cik_mc_init(rdev);
8611         if (r)
8612                 return r;
8613         /* Memory manager */
8614         r = radeon_bo_init(rdev);
8615         if (r)
8616                 return r;
8617
8618         if (rdev->flags & RADEON_IS_IGP) {
8619                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8620                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8621                         r = cik_init_microcode(rdev);
8622                         if (r) {
8623                                 DRM_ERROR("Failed to load firmware!\n");
8624                                 return r;
8625                         }
8626                 }
8627         } else {
8628                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8629                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8630                     !rdev->mc_fw) {
8631                         r = cik_init_microcode(rdev);
8632                         if (r) {
8633                                 DRM_ERROR("Failed to load firmware!\n");
8634                                 return r;
8635                         }
8636                 }
8637         }
8638
8639         /* Initialize power management */
8640         radeon_pm_init(rdev);
8641
8642         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8643         ring->ring_obj = NULL;
8644         r600_ring_init(rdev, ring, 1024 * 1024);
8645
8646         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8647         ring->ring_obj = NULL;
8648         r600_ring_init(rdev, ring, 1024 * 1024);
8649         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8650         if (r)
8651                 return r;
8652
8653         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8654         ring->ring_obj = NULL;
8655         r600_ring_init(rdev, ring, 1024 * 1024);
8656         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8657         if (r)
8658                 return r;
8659
8660         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8661         ring->ring_obj = NULL;
8662         r600_ring_init(rdev, ring, 256 * 1024);
8663
8664         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8665         ring->ring_obj = NULL;
8666         r600_ring_init(rdev, ring, 256 * 1024);
8667
8668         cik_uvd_init(rdev);
8669         cik_vce_init(rdev);
8670
8671         rdev->ih.ring_obj = NULL;
8672         r600_ih_ring_init(rdev, 64 * 1024);
8673
8674         r = r600_pcie_gart_init(rdev);
8675         if (r)
8676                 return r;
8677
8678         rdev->accel_working = true;
8679         r = cik_startup(rdev);
8680         if (r) {
8681                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8682                 cik_cp_fini(rdev);
8683                 cik_sdma_fini(rdev);
8684                 cik_irq_fini(rdev);
8685                 sumo_rlc_fini(rdev);
8686                 cik_mec_fini(rdev);
8687                 radeon_wb_fini(rdev);
8688                 radeon_ib_pool_fini(rdev);
8689                 radeon_vm_manager_fini(rdev);
8690                 radeon_irq_kms_fini(rdev);
8691                 cik_pcie_gart_fini(rdev);
8692                 rdev->accel_working = false;
8693         }
8694
8695         /* Don't start up if the MC ucode is missing.
8696          * The default clocks and voltages before the MC ucode
8697          * is loaded are not suffient for advanced operations.
8698          */
8699         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8700                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8701                 return -EINVAL;
8702         }
8703
8704         return 0;
8705 }
8706
8707 /**
8708  * cik_fini - asic specific driver and hw fini
8709  *
8710  * @rdev: radeon_device pointer
8711  *
8712  * Tear down the asic specific driver variables and program the hw
8713  * to an idle state (CIK).
8714  * Called at driver unload.
8715  */
8716 void cik_fini(struct radeon_device *rdev)
8717 {
8718         radeon_pm_fini(rdev);
8719         cik_cp_fini(rdev);
8720         cik_sdma_fini(rdev);
8721         cik_fini_pg(rdev);
8722         cik_fini_cg(rdev);
8723         cik_irq_fini(rdev);
8724         sumo_rlc_fini(rdev);
8725         cik_mec_fini(rdev);
8726         radeon_wb_fini(rdev);
8727         radeon_vm_manager_fini(rdev);
8728         radeon_ib_pool_fini(rdev);
8729         radeon_irq_kms_fini(rdev);
8730         uvd_v1_0_fini(rdev);
8731         radeon_uvd_fini(rdev);
8732         radeon_vce_fini(rdev);
8733         cik_pcie_gart_fini(rdev);
8734         r600_vram_scratch_fini(rdev);
8735         radeon_gem_fini(rdev);
8736         radeon_fence_driver_fini(rdev);
8737         radeon_bo_fini(rdev);
8738         radeon_atombios_fini(rdev);
8739         kfree(rdev->bios);
8740         rdev->bios = NULL;
8741 }
8742
8743 void dce8_program_fmt(struct drm_encoder *encoder)
8744 {
8745         struct drm_device *dev = encoder->dev;
8746         struct radeon_device *rdev = dev->dev_private;
8747         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8748         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8749         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8750         int bpc = 0;
8751         u32 tmp = 0;
8752         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8753
8754         if (connector) {
8755                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8756                 bpc = radeon_get_monitor_bpc(connector);
8757                 dither = radeon_connector->dither;
8758         }
8759
8760         /* LVDS/eDP FMT is set up by atom */
8761         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8762                 return;
8763
8764         /* not needed for analog */
8765         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8766             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8767                 return;
8768
8769         if (bpc == 0)
8770                 return;
8771
8772         switch (bpc) {
8773         case 6:
8774                 if (dither == RADEON_FMT_DITHER_ENABLE)
8775                         /* XXX sort out optimal dither settings */
8776                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8777                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8778                 else
8779                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8780                 break;
8781         case 8:
8782                 if (dither == RADEON_FMT_DITHER_ENABLE)
8783                         /* XXX sort out optimal dither settings */
8784                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8785                                 FMT_RGB_RANDOM_ENABLE |
8786                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8787                 else
8788                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8789                 break;
8790         case 10:
8791                 if (dither == RADEON_FMT_DITHER_ENABLE)
8792                         /* XXX sort out optimal dither settings */
8793                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8794                                 FMT_RGB_RANDOM_ENABLE |
8795                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8796                 else
8797                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8798                 break;
8799         default:
8800                 /* not needed */
8801                 break;
8802         }
8803
8804         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8805 }
8806
8807 /* display watermark setup */
8808 /**
8809  * dce8_line_buffer_adjust - Set up the line buffer
8810  *
8811  * @rdev: radeon_device pointer
8812  * @radeon_crtc: the selected display controller
8813  * @mode: the current display mode on the selected display
8814  * controller
8815  *
8816  * Setup up the line buffer allocation for
8817  * the selected display controller (CIK).
8818  * Returns the line buffer size in pixels.
8819  */
8820 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8821                                    struct radeon_crtc *radeon_crtc,
8822                                    struct drm_display_mode *mode)
8823 {
8824         u32 tmp, buffer_alloc, i;
8825         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8826         /*
8827          * Line Buffer Setup
8828          * There are 6 line buffers, one for each display controllers.
8829          * There are 3 partitions per LB. Select the number of partitions
8830          * to enable based on the display width.  For display widths larger
8831          * than 4096, you need use to use 2 display controllers and combine
8832          * them using the stereo blender.
8833          */
8834         if (radeon_crtc->base.enabled && mode) {
8835                 if (mode->crtc_hdisplay < 1920) {
8836                         tmp = 1;
8837                         buffer_alloc = 2;
8838                 } else if (mode->crtc_hdisplay < 2560) {
8839                         tmp = 2;
8840                         buffer_alloc = 2;
8841                 } else if (mode->crtc_hdisplay < 4096) {
8842                         tmp = 0;
8843                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8844                 } else {
8845                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8846                         tmp = 0;
8847                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8848                 }
8849         } else {
8850                 tmp = 1;
8851                 buffer_alloc = 0;
8852         }
8853
8854         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8855                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8856
8857         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8858                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8859         for (i = 0; i < rdev->usec_timeout; i++) {
8860                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8861                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8862                         break;
8863                 udelay(1);
8864         }
8865
8866         if (radeon_crtc->base.enabled && mode) {
8867                 switch (tmp) {
8868                 case 0:
8869                 default:
8870                         return 4096 * 2;
8871                 case 1:
8872                         return 1920 * 2;
8873                 case 2:
8874                         return 2560 * 2;
8875                 }
8876         }
8877
8878         /* controller not enabled, so no lb used */
8879         return 0;
8880 }
8881
8882 /**
8883  * cik_get_number_of_dram_channels - get the number of dram channels
8884  *
8885  * @rdev: radeon_device pointer
8886  *
8887  * Look up the number of video ram channels (CIK).
8888  * Used for display watermark bandwidth calculations
8889  * Returns the number of dram channels
8890  */
8891 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8892 {
8893         u32 tmp = RREG32(MC_SHARED_CHMAP);
8894
8895         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8896         case 0:
8897         default:
8898                 return 1;
8899         case 1:
8900                 return 2;
8901         case 2:
8902                 return 4;
8903         case 3:
8904                 return 8;
8905         case 4:
8906                 return 3;
8907         case 5:
8908                 return 6;
8909         case 6:
8910                 return 10;
8911         case 7:
8912                 return 12;
8913         case 8:
8914                 return 16;
8915         }
8916 }
8917
8918 struct dce8_wm_params {
8919         u32 dram_channels; /* number of dram channels */
8920         u32 yclk;          /* bandwidth per dram data pin in kHz */
8921         u32 sclk;          /* engine clock in kHz */
8922         u32 disp_clk;      /* display clock in kHz */
8923         u32 src_width;     /* viewport width */
8924         u32 active_time;   /* active display time in ns */
8925         u32 blank_time;    /* blank time in ns */
8926         bool interlaced;    /* mode is interlaced */
8927         fixed20_12 vsc;    /* vertical scale ratio */
8928         u32 num_heads;     /* number of active crtcs */
8929         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8930         u32 lb_size;       /* line buffer allocated to pipe */
8931         u32 vtaps;         /* vertical scaler taps */
8932 };
8933
8934 /**
8935  * dce8_dram_bandwidth - get the dram bandwidth
8936  *
8937  * @wm: watermark calculation data
8938  *
8939  * Calculate the raw dram bandwidth (CIK).
8940  * Used for display watermark bandwidth calculations
8941  * Returns the dram bandwidth in MBytes/s
8942  */
8943 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8944 {
8945         /* Calculate raw DRAM Bandwidth */
8946         fixed20_12 dram_efficiency; /* 0.7 */
8947         fixed20_12 yclk, dram_channels, bandwidth;
8948         fixed20_12 a;
8949
8950         a.full = dfixed_const(1000);
8951         yclk.full = dfixed_const(wm->yclk);
8952         yclk.full = dfixed_div(yclk, a);
8953         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8954         a.full = dfixed_const(10);
8955         dram_efficiency.full = dfixed_const(7);
8956         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8957         bandwidth.full = dfixed_mul(dram_channels, yclk);
8958         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8959
8960         return dfixed_trunc(bandwidth);
8961 }
8962
8963 /**
8964  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8965  *
8966  * @wm: watermark calculation data
8967  *
8968  * Calculate the dram bandwidth used for display (CIK).
8969  * Used for display watermark bandwidth calculations
8970  * Returns the dram bandwidth for display in MBytes/s
8971  */
8972 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8973 {
8974         /* Calculate DRAM Bandwidth and the part allocated to display. */
8975         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8976         fixed20_12 yclk, dram_channels, bandwidth;
8977         fixed20_12 a;
8978
8979         a.full = dfixed_const(1000);
8980         yclk.full = dfixed_const(wm->yclk);
8981         yclk.full = dfixed_div(yclk, a);
8982         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8983         a.full = dfixed_const(10);
8984         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8985         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8986         bandwidth.full = dfixed_mul(dram_channels, yclk);
8987         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8988
8989         return dfixed_trunc(bandwidth);
8990 }
8991
8992 /**
8993  * dce8_data_return_bandwidth - get the data return bandwidth
8994  *
8995  * @wm: watermark calculation data
8996  *
8997  * Calculate the data return bandwidth used for display (CIK).
8998  * Used for display watermark bandwidth calculations
8999  * Returns the data return bandwidth in MBytes/s
9000  */
9001 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9002 {
9003         /* Calculate the display Data return Bandwidth */
9004         fixed20_12 return_efficiency; /* 0.8 */
9005         fixed20_12 sclk, bandwidth;
9006         fixed20_12 a;
9007
9008         a.full = dfixed_const(1000);
9009         sclk.full = dfixed_const(wm->sclk);
9010         sclk.full = dfixed_div(sclk, a);
9011         a.full = dfixed_const(10);
9012         return_efficiency.full = dfixed_const(8);
9013         return_efficiency.full = dfixed_div(return_efficiency, a);
9014         a.full = dfixed_const(32);
9015         bandwidth.full = dfixed_mul(a, sclk);
9016         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9017
9018         return dfixed_trunc(bandwidth);
9019 }
9020
9021 /**
9022  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9023  *
9024  * @wm: watermark calculation data
9025  *
9026  * Calculate the dmif bandwidth used for display (CIK).
9027  * Used for display watermark bandwidth calculations
9028  * Returns the dmif bandwidth in MBytes/s
9029  */
9030 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9031 {
9032         /* Calculate the DMIF Request Bandwidth */
9033         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9034         fixed20_12 disp_clk, bandwidth;
9035         fixed20_12 a, b;
9036
9037         a.full = dfixed_const(1000);
9038         disp_clk.full = dfixed_const(wm->disp_clk);
9039         disp_clk.full = dfixed_div(disp_clk, a);
9040         a.full = dfixed_const(32);
9041         b.full = dfixed_mul(a, disp_clk);
9042
9043         a.full = dfixed_const(10);
9044         disp_clk_request_efficiency.full = dfixed_const(8);
9045         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9046
9047         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9048
9049         return dfixed_trunc(bandwidth);
9050 }
9051
9052 /**
9053  * dce8_available_bandwidth - get the min available bandwidth
9054  *
9055  * @wm: watermark calculation data
9056  *
9057  * Calculate the min available bandwidth used for display (CIK).
9058  * Used for display watermark bandwidth calculations
9059  * Returns the min available bandwidth in MBytes/s
9060  */
9061 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9062 {
9063         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9064         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9065         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9066         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9067
9068         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9069 }
9070
9071 /**
9072  * dce8_average_bandwidth - get the average available bandwidth
9073  *
9074  * @wm: watermark calculation data
9075  *
9076  * Calculate the average available bandwidth used for display (CIK).
9077  * Used for display watermark bandwidth calculations
9078  * Returns the average available bandwidth in MBytes/s
9079  */
9080 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9081 {
9082         /* Calculate the display mode Average Bandwidth
9083          * DisplayMode should contain the source and destination dimensions,
9084          * timing, etc.
9085          */
9086         fixed20_12 bpp;
9087         fixed20_12 line_time;
9088         fixed20_12 src_width;
9089         fixed20_12 bandwidth;
9090         fixed20_12 a;
9091
9092         a.full = dfixed_const(1000);
9093         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9094         line_time.full = dfixed_div(line_time, a);
9095         bpp.full = dfixed_const(wm->bytes_per_pixel);
9096         src_width.full = dfixed_const(wm->src_width);
9097         bandwidth.full = dfixed_mul(src_width, bpp);
9098         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9099         bandwidth.full = dfixed_div(bandwidth, line_time);
9100
9101         return dfixed_trunc(bandwidth);
9102 }
9103
9104 /**
9105  * dce8_latency_watermark - get the latency watermark
9106  *
9107  * @wm: watermark calculation data
9108  *
9109  * Calculate the latency watermark (CIK).
9110  * Used for display watermark bandwidth calculations
9111  * Returns the latency watermark in ns
9112  */
9113 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9114 {
9115         /* First calculate the latency in ns */
9116         u32 mc_latency = 2000; /* 2000 ns. */
9117         u32 available_bandwidth = dce8_available_bandwidth(wm);
9118         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9119         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9120         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9121         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9122                 (wm->num_heads * cursor_line_pair_return_time);
9123         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9124         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9125         u32 tmp, dmif_size = 12288;
9126         fixed20_12 a, b, c;
9127
9128         if (wm->num_heads == 0)
9129                 return 0;
9130
9131         a.full = dfixed_const(2);
9132         b.full = dfixed_const(1);
9133         if ((wm->vsc.full > a.full) ||
9134             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9135             (wm->vtaps >= 5) ||
9136             ((wm->vsc.full >= a.full) && wm->interlaced))
9137                 max_src_lines_per_dst_line = 4;
9138         else
9139                 max_src_lines_per_dst_line = 2;
9140
9141         a.full = dfixed_const(available_bandwidth);
9142         b.full = dfixed_const(wm->num_heads);
9143         a.full = dfixed_div(a, b);
9144         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9145         tmp = min(dfixed_trunc(a), tmp);
9146
9147         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9148
9149         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9150         b.full = dfixed_const(1000);
9151         c.full = dfixed_const(lb_fill_bw);
9152         b.full = dfixed_div(c, b);
9153         a.full = dfixed_div(a, b);
9154         line_fill_time = dfixed_trunc(a);
9155
9156         if (line_fill_time < wm->active_time)
9157                 return latency;
9158         else
9159                 return latency + (line_fill_time - wm->active_time);
9160
9161 }
9162
9163 /**
9164  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9165  * average and available dram bandwidth
9166  *
9167  * @wm: watermark calculation data
9168  *
9169  * Check if the display average bandwidth fits in the display
9170  * dram bandwidth (CIK).
9171  * Used for display watermark bandwidth calculations
9172  * Returns true if the display fits, false if not.
9173  */
9174 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9175 {
9176         if (dce8_average_bandwidth(wm) <=
9177             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9178                 return true;
9179         else
9180                 return false;
9181 }
9182
9183 /**
9184  * dce8_average_bandwidth_vs_available_bandwidth - check
9185  * average and available bandwidth
9186  *
9187  * @wm: watermark calculation data
9188  *
9189  * Check if the display average bandwidth fits in the display
9190  * available bandwidth (CIK).
9191  * Used for display watermark bandwidth calculations
9192  * Returns true if the display fits, false if not.
9193  */
9194 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9195 {
9196         if (dce8_average_bandwidth(wm) <=
9197             (dce8_available_bandwidth(wm) / wm->num_heads))
9198                 return true;
9199         else
9200                 return false;
9201 }
9202
9203 /**
9204  * dce8_check_latency_hiding - check latency hiding
9205  *
9206  * @wm: watermark calculation data
9207  *
9208  * Check latency hiding (CIK).
9209  * Used for display watermark bandwidth calculations
9210  * Returns true if the display fits, false if not.
9211  */
9212 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9213 {
9214         u32 lb_partitions = wm->lb_size / wm->src_width;
9215         u32 line_time = wm->active_time + wm->blank_time;
9216         u32 latency_tolerant_lines;
9217         u32 latency_hiding;
9218         fixed20_12 a;
9219
9220         a.full = dfixed_const(1);
9221         if (wm->vsc.full > a.full)
9222                 latency_tolerant_lines = 1;
9223         else {
9224                 if (lb_partitions <= (wm->vtaps + 1))
9225                         latency_tolerant_lines = 1;
9226                 else
9227                         latency_tolerant_lines = 2;
9228         }
9229
9230         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9231
9232         if (dce8_latency_watermark(wm) <= latency_hiding)
9233                 return true;
9234         else
9235                 return false;
9236 }
9237
9238 /**
9239  * dce8_program_watermarks - program display watermarks
9240  *
9241  * @rdev: radeon_device pointer
9242  * @radeon_crtc: the selected display controller
9243  * @lb_size: line buffer size
9244  * @num_heads: number of display controllers in use
9245  *
9246  * Calculate and program the display watermarks for the
9247  * selected display controller (CIK).
9248  */
9249 static void dce8_program_watermarks(struct radeon_device *rdev,
9250                                     struct radeon_crtc *radeon_crtc,
9251                                     u32 lb_size, u32 num_heads)
9252 {
9253         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9254         struct dce8_wm_params wm_low, wm_high;
9255         u32 active_time;
9256         u32 line_time = 0;
9257         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9258         u32 tmp, wm_mask;
9259
9260         if (radeon_crtc->base.enabled && num_heads && mode) {
9261                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9262                                             (u32)mode->clock);
9263                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9264                                           (u32)mode->clock);
9265                 line_time = min(line_time, (u32)65535);
9266
9267                 /* watermark for high clocks */
9268                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9269                     rdev->pm.dpm_enabled) {
9270                         wm_high.yclk =
9271                                 radeon_dpm_get_mclk(rdev, false) * 10;
9272                         wm_high.sclk =
9273                                 radeon_dpm_get_sclk(rdev, false) * 10;
9274                 } else {
9275                         wm_high.yclk = rdev->pm.current_mclk * 10;
9276                         wm_high.sclk = rdev->pm.current_sclk * 10;
9277                 }
9278
9279                 wm_high.disp_clk = mode->clock;
9280                 wm_high.src_width = mode->crtc_hdisplay;
9281                 wm_high.active_time = active_time;
9282                 wm_high.blank_time = line_time - wm_high.active_time;
9283                 wm_high.interlaced = false;
9284                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9285                         wm_high.interlaced = true;
9286                 wm_high.vsc = radeon_crtc->vsc;
9287                 wm_high.vtaps = 1;
9288                 if (radeon_crtc->rmx_type != RMX_OFF)
9289                         wm_high.vtaps = 2;
9290                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9291                 wm_high.lb_size = lb_size;
9292                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9293                 wm_high.num_heads = num_heads;
9294
9295                 /* set for high clocks */
9296                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9297
9298                 /* possibly force display priority to high */
9299                 /* should really do this at mode validation time... */
9300                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9301                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9302                     !dce8_check_latency_hiding(&wm_high) ||
9303                     (rdev->disp_priority == 2)) {
9304                         DRM_DEBUG_KMS("force priority to high\n");
9305                 }
9306
9307                 /* watermark for low clocks */
9308                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9309                     rdev->pm.dpm_enabled) {
9310                         wm_low.yclk =
9311                                 radeon_dpm_get_mclk(rdev, true) * 10;
9312                         wm_low.sclk =
9313                                 radeon_dpm_get_sclk(rdev, true) * 10;
9314                 } else {
9315                         wm_low.yclk = rdev->pm.current_mclk * 10;
9316                         wm_low.sclk = rdev->pm.current_sclk * 10;
9317                 }
9318
9319                 wm_low.disp_clk = mode->clock;
9320                 wm_low.src_width = mode->crtc_hdisplay;
9321                 wm_low.active_time = active_time;
9322                 wm_low.blank_time = line_time - wm_low.active_time;
9323                 wm_low.interlaced = false;
9324                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9325                         wm_low.interlaced = true;
9326                 wm_low.vsc = radeon_crtc->vsc;
9327                 wm_low.vtaps = 1;
9328                 if (radeon_crtc->rmx_type != RMX_OFF)
9329                         wm_low.vtaps = 2;
9330                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9331                 wm_low.lb_size = lb_size;
9332                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9333                 wm_low.num_heads = num_heads;
9334
9335                 /* set for low clocks */
9336                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9337
9338                 /* possibly force display priority to high */
9339                 /* should really do this at mode validation time... */
9340                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9341                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9342                     !dce8_check_latency_hiding(&wm_low) ||
9343                     (rdev->disp_priority == 2)) {
9344                         DRM_DEBUG_KMS("force priority to high\n");
9345                 }
9346
9347                 /* Save number of lines the linebuffer leads before the scanout */
9348                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9349         }
9350
9351         /* select wm A */
9352         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9353         tmp = wm_mask;
9354         tmp &= ~LATENCY_WATERMARK_MASK(3);
9355         tmp |= LATENCY_WATERMARK_MASK(1);
9356         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9357         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9358                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9359                 LATENCY_HIGH_WATERMARK(line_time)));
9360         /* select wm B */
9361         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9362         tmp &= ~LATENCY_WATERMARK_MASK(3);
9363         tmp |= LATENCY_WATERMARK_MASK(2);
9364         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9365         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9366                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9367                 LATENCY_HIGH_WATERMARK(line_time)));
9368         /* restore original selection */
9369         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9370
9371         /* save values for DPM */
9372         radeon_crtc->line_time = line_time;
9373         radeon_crtc->wm_high = latency_watermark_a;
9374         radeon_crtc->wm_low = latency_watermark_b;
9375 }
9376
9377 /**
9378  * dce8_bandwidth_update - program display watermarks
9379  *
9380  * @rdev: radeon_device pointer
9381  *
9382  * Calculate and program the display watermarks and line
9383  * buffer allocation (CIK).
9384  */
9385 void dce8_bandwidth_update(struct radeon_device *rdev)
9386 {
9387         struct drm_display_mode *mode = NULL;
9388         u32 num_heads = 0, lb_size;
9389         int i;
9390
9391         if (!rdev->mode_info.mode_config_initialized)
9392                 return;
9393
9394         radeon_update_display_priority(rdev);
9395
9396         for (i = 0; i < rdev->num_crtc; i++) {
9397                 if (rdev->mode_info.crtcs[i]->base.enabled)
9398                         num_heads++;
9399         }
9400         for (i = 0; i < rdev->num_crtc; i++) {
9401                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9402                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9403                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9404         }
9405 }
9406
9407 /**
9408  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9409  *
9410  * @rdev: radeon_device pointer
9411  *
9412  * Fetches a GPU clock counter snapshot (SI).
9413  * Returns the 64 bit clock counter snapshot.
9414  */
9415 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9416 {
9417         uint64_t clock;
9418
9419         mutex_lock(&rdev->gpu_clock_mutex);
9420         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9421         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9422                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9423         mutex_unlock(&rdev->gpu_clock_mutex);
9424         return clock;
9425 }
9426
9427 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9428                              u32 cntl_reg, u32 status_reg)
9429 {
9430         int r, i;
9431         struct atom_clock_dividers dividers;
9432         uint32_t tmp;
9433
9434         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9435                                            clock, false, &dividers);
9436         if (r)
9437                 return r;
9438
9439         tmp = RREG32_SMC(cntl_reg);
9440         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9441         tmp |= dividers.post_divider;
9442         WREG32_SMC(cntl_reg, tmp);
9443
9444         for (i = 0; i < 100; i++) {
9445                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9446                         break;
9447                 mdelay(10);
9448         }
9449         if (i == 100)
9450                 return -ETIMEDOUT;
9451
9452         return 0;
9453 }
9454
9455 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9456 {
9457         int r = 0;
9458
9459         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9460         if (r)
9461                 return r;
9462
9463         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9464         return r;
9465 }
9466
9467 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9468 {
9469         int r, i;
9470         struct atom_clock_dividers dividers;
9471         u32 tmp;
9472
9473         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9474                                            ecclk, false, &dividers);
9475         if (r)
9476                 return r;
9477
9478         for (i = 0; i < 100; i++) {
9479                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9480                         break;
9481                 mdelay(10);
9482         }
9483         if (i == 100)
9484                 return -ETIMEDOUT;
9485
9486         tmp = RREG32_SMC(CG_ECLK_CNTL);
9487         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9488         tmp |= dividers.post_divider;
9489         WREG32_SMC(CG_ECLK_CNTL, tmp);
9490
9491         for (i = 0; i < 100; i++) {
9492                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9493                         break;
9494                 mdelay(10);
9495         }
9496         if (i == 100)
9497                 return -ETIMEDOUT;
9498
9499         return 0;
9500 }
9501
9502 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9503 {
9504         struct pci_dev *root = rdev->pdev->bus->self;
9505         int bridge_pos, gpu_pos;
9506         u32 speed_cntl, mask, current_data_rate;
9507         int ret, i;
9508         u16 tmp16;
9509
9510         if (pci_is_root_bus(rdev->pdev->bus))
9511                 return;
9512
9513         if (radeon_pcie_gen2 == 0)
9514                 return;
9515
9516         if (rdev->flags & RADEON_IS_IGP)
9517                 return;
9518
9519         if (!(rdev->flags & RADEON_IS_PCIE))
9520                 return;
9521
9522         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9523         if (ret != 0)
9524                 return;
9525
9526         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9527                 return;
9528
9529         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9530         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9531                 LC_CURRENT_DATA_RATE_SHIFT;
9532         if (mask & DRM_PCIE_SPEED_80) {
9533                 if (current_data_rate == 2) {
9534                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9535                         return;
9536                 }
9537                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9538         } else if (mask & DRM_PCIE_SPEED_50) {
9539                 if (current_data_rate == 1) {
9540                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9541                         return;
9542                 }
9543                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9544         }
9545
9546         bridge_pos = pci_pcie_cap(root);
9547         if (!bridge_pos)
9548                 return;
9549
9550         gpu_pos = pci_pcie_cap(rdev->pdev);
9551         if (!gpu_pos)
9552                 return;
9553
9554         if (mask & DRM_PCIE_SPEED_80) {
9555                 /* re-try equalization if gen3 is not already enabled */
9556                 if (current_data_rate != 2) {
9557                         u16 bridge_cfg, gpu_cfg;
9558                         u16 bridge_cfg2, gpu_cfg2;
9559                         u32 max_lw, current_lw, tmp;
9560
9561                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9562                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9563
9564                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9565                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9566
9567                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9568                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9569
9570                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9571                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9572                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9573
9574                         if (current_lw < max_lw) {
9575                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9576                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9577                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9578                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9579                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9580                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9581                                 }
9582                         }
9583
9584                         for (i = 0; i < 10; i++) {
9585                                 /* check status */
9586                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9587                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9588                                         break;
9589
9590                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9591                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9592
9593                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9594                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9595
9596                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9597                                 tmp |= LC_SET_QUIESCE;
9598                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9599
9600                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9601                                 tmp |= LC_REDO_EQ;
9602                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9603
9604                                 mdelay(100);
9605
9606                                 /* linkctl */
9607                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9608                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9609                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9610                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9611
9612                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9613                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9614                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9615                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9616
9617                                 /* linkctl2 */
9618                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9619                                 tmp16 &= ~((1 << 4) | (7 << 9));
9620                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9621                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9622
9623                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9624                                 tmp16 &= ~((1 << 4) | (7 << 9));
9625                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9626                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9627
9628                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9629                                 tmp &= ~LC_SET_QUIESCE;
9630                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9631                         }
9632                 }
9633         }
9634
9635         /* set the link speed */
9636         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9637         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9638         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9639
9640         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9641         tmp16 &= ~0xf;
9642         if (mask & DRM_PCIE_SPEED_80)
9643                 tmp16 |= 3; /* gen3 */
9644         else if (mask & DRM_PCIE_SPEED_50)
9645                 tmp16 |= 2; /* gen2 */
9646         else
9647                 tmp16 |= 1; /* gen1 */
9648         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9649
9650         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9651         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9652         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9653
9654         for (i = 0; i < rdev->usec_timeout; i++) {
9655                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9656                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9657                         break;
9658                 udelay(1);
9659         }
9660 }
9661
9662 static void cik_program_aspm(struct radeon_device *rdev)
9663 {
9664         u32 data, orig;
9665         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9666         bool disable_clkreq = false;
9667
9668         if (radeon_aspm == 0)
9669                 return;
9670
9671         /* XXX double check IGPs */
9672         if (rdev->flags & RADEON_IS_IGP)
9673                 return;
9674
9675         if (!(rdev->flags & RADEON_IS_PCIE))
9676                 return;
9677
9678         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9679         data &= ~LC_XMIT_N_FTS_MASK;
9680         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9681         if (orig != data)
9682                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9683
9684         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9685         data |= LC_GO_TO_RECOVERY;
9686         if (orig != data)
9687                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9688
9689         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9690         data |= P_IGNORE_EDB_ERR;
9691         if (orig != data)
9692                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9693
9694         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9695         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9696         data |= LC_PMI_TO_L1_DIS;
9697         if (!disable_l0s)
9698                 data |= LC_L0S_INACTIVITY(7);
9699
9700         if (!disable_l1) {
9701                 data |= LC_L1_INACTIVITY(7);
9702                 data &= ~LC_PMI_TO_L1_DIS;
9703                 if (orig != data)
9704                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9705
9706                 if (!disable_plloff_in_l1) {
9707                         bool clk_req_support;
9708
9709                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9710                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9711                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9712                         if (orig != data)
9713                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9714
9715                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9716                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9717                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9718                         if (orig != data)
9719                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9720
9721                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9722                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9723                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9724                         if (orig != data)
9725                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9726
9727                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9728                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9729                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9730                         if (orig != data)
9731                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9732
9733                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9734                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9735                         data |= LC_DYN_LANES_PWR_STATE(3);
9736                         if (orig != data)
9737                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9738
9739                         if (!disable_clkreq &&
9740                             !pci_is_root_bus(rdev->pdev->bus)) {
9741                                 struct pci_dev *root = rdev->pdev->bus->self;
9742                                 u32 lnkcap;
9743
9744                                 clk_req_support = false;
9745                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9746                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9747                                         clk_req_support = true;
9748                         } else {
9749                                 clk_req_support = false;
9750                         }
9751
9752                         if (clk_req_support) {
9753                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9754                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9755                                 if (orig != data)
9756                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9757
9758                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9759                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9760                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9761                                 if (orig != data)
9762                                         WREG32_SMC(THM_CLK_CNTL, data);
9763
9764                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9765                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9766                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9767                                 if (orig != data)
9768                                         WREG32_SMC(MISC_CLK_CTRL, data);
9769
9770                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9771                                 data &= ~BCLK_AS_XCLK;
9772                                 if (orig != data)
9773                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9774
9775                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9776                                 data &= ~FORCE_BIF_REFCLK_EN;
9777                                 if (orig != data)
9778                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9779
9780                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9781                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9782                                 data |= MPLL_CLKOUT_SEL(4);
9783                                 if (orig != data)
9784                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9785                         }
9786                 }
9787         } else {
9788                 if (orig != data)
9789                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9790         }
9791
9792         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9793         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9794         if (orig != data)
9795                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9796
9797         if (!disable_l0s) {
9798                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9799                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9800                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9801                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9802                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9803                                 data &= ~LC_L0S_INACTIVITY_MASK;
9804                                 if (orig != data)
9805                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9806                         }
9807                 }
9808         }
9809 }