d7eea75b2c2771550f2ee319c82d018579ff25ec
[sfrench/cifs-2.6.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24
25 #include <linux/firmware.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28
29 #include <drm/drm_pci.h>
30 #include <drm/drm_vblank.h>
31 #include <drm/radeon_drm.h>
32
33 #include "atom.h"
34 #include "clearstate_si.h"
35 #include "radeon.h"
36 #include "radeon_asic.h"
37 #include "radeon_audio.h"
38 #include "radeon_ucode.h"
39 #include "si_blit_shaders.h"
40 #include "sid.h"
41
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
50
51 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
52 MODULE_FIRMWARE("radeon/tahiti_me.bin");
53 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
54 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
55 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
56 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
57
58 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
61 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
62 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
63 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
64 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
65
66 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
69 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
70 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
71 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
72 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
73
74 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
75 MODULE_FIRMWARE("radeon/VERDE_me.bin");
76 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
77 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
78 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
79 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
80 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
81
82 MODULE_FIRMWARE("radeon/verde_pfp.bin");
83 MODULE_FIRMWARE("radeon/verde_me.bin");
84 MODULE_FIRMWARE("radeon/verde_ce.bin");
85 MODULE_FIRMWARE("radeon/verde_mc.bin");
86 MODULE_FIRMWARE("radeon/verde_rlc.bin");
87 MODULE_FIRMWARE("radeon/verde_smc.bin");
88 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
89
90 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
91 MODULE_FIRMWARE("radeon/OLAND_me.bin");
92 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
93 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
94 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
95 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
96 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
97
98 MODULE_FIRMWARE("radeon/oland_pfp.bin");
99 MODULE_FIRMWARE("radeon/oland_me.bin");
100 MODULE_FIRMWARE("radeon/oland_ce.bin");
101 MODULE_FIRMWARE("radeon/oland_mc.bin");
102 MODULE_FIRMWARE("radeon/oland_rlc.bin");
103 MODULE_FIRMWARE("radeon/oland_smc.bin");
104 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
105
106 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
109 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
110 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
111 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
112 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
113
114 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
115 MODULE_FIRMWARE("radeon/hainan_me.bin");
116 MODULE_FIRMWARE("radeon/hainan_ce.bin");
117 MODULE_FIRMWARE("radeon/hainan_mc.bin");
118 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
119 MODULE_FIRMWARE("radeon/hainan_smc.bin");
120 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
121 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
122
123 MODULE_FIRMWARE("radeon/si58_mc.bin");
124
125 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
126 static void si_pcie_gen3_enable(struct radeon_device *rdev);
127 static void si_program_aspm(struct radeon_device *rdev);
128 extern void sumo_rlc_fini(struct radeon_device *rdev);
129 extern int sumo_rlc_init(struct radeon_device *rdev);
130 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
131 extern void r600_ih_ring_fini(struct radeon_device *rdev);
132 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
133 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
134 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
135 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
136 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
137 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
138 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
139                                          bool enable);
140 static void si_init_pg(struct radeon_device *rdev);
141 static void si_init_cg(struct radeon_device *rdev);
142 static void si_fini_pg(struct radeon_device *rdev);
143 static void si_fini_cg(struct radeon_device *rdev);
144 static void si_rlc_stop(struct radeon_device *rdev);
145
146 static const u32 crtc_offsets[] =
147 {
148         EVERGREEN_CRTC0_REGISTER_OFFSET,
149         EVERGREEN_CRTC1_REGISTER_OFFSET,
150         EVERGREEN_CRTC2_REGISTER_OFFSET,
151         EVERGREEN_CRTC3_REGISTER_OFFSET,
152         EVERGREEN_CRTC4_REGISTER_OFFSET,
153         EVERGREEN_CRTC5_REGISTER_OFFSET
154 };
155
156 static const u32 si_disp_int_status[] =
157 {
158         DISP_INTERRUPT_STATUS,
159         DISP_INTERRUPT_STATUS_CONTINUE,
160         DISP_INTERRUPT_STATUS_CONTINUE2,
161         DISP_INTERRUPT_STATUS_CONTINUE3,
162         DISP_INTERRUPT_STATUS_CONTINUE4,
163         DISP_INTERRUPT_STATUS_CONTINUE5
164 };
165
166 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
167 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
168 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
169
170 static const u32 verde_rlc_save_restore_register_list[] =
171 {
172         (0x8000 << 16) | (0x98f4 >> 2),
173         0x00000000,
174         (0x8040 << 16) | (0x98f4 >> 2),
175         0x00000000,
176         (0x8000 << 16) | (0xe80 >> 2),
177         0x00000000,
178         (0x8040 << 16) | (0xe80 >> 2),
179         0x00000000,
180         (0x8000 << 16) | (0x89bc >> 2),
181         0x00000000,
182         (0x8040 << 16) | (0x89bc >> 2),
183         0x00000000,
184         (0x8000 << 16) | (0x8c1c >> 2),
185         0x00000000,
186         (0x8040 << 16) | (0x8c1c >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x98f0 >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0xe7c >> 2),
191         0x00000000,
192         (0x8000 << 16) | (0x9148 >> 2),
193         0x00000000,
194         (0x8040 << 16) | (0x9148 >> 2),
195         0x00000000,
196         (0x9c00 << 16) | (0x9150 >> 2),
197         0x00000000,
198         (0x9c00 << 16) | (0x897c >> 2),
199         0x00000000,
200         (0x9c00 << 16) | (0x8d8c >> 2),
201         0x00000000,
202         (0x9c00 << 16) | (0xac54 >> 2),
203         0X00000000,
204         0x3,
205         (0x9c00 << 16) | (0x98f8 >> 2),
206         0x00000000,
207         (0x9c00 << 16) | (0x9910 >> 2),
208         0x00000000,
209         (0x9c00 << 16) | (0x9914 >> 2),
210         0x00000000,
211         (0x9c00 << 16) | (0x9918 >> 2),
212         0x00000000,
213         (0x9c00 << 16) | (0x991c >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x9920 >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x9924 >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x9928 >> 2),
220         0x00000000,
221         (0x9c00 << 16) | (0x992c >> 2),
222         0x00000000,
223         (0x9c00 << 16) | (0x9930 >> 2),
224         0x00000000,
225         (0x9c00 << 16) | (0x9934 >> 2),
226         0x00000000,
227         (0x9c00 << 16) | (0x9938 >> 2),
228         0x00000000,
229         (0x9c00 << 16) | (0x993c >> 2),
230         0x00000000,
231         (0x9c00 << 16) | (0x9940 >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x9944 >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x9948 >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x994c >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x9950 >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x9954 >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x9958 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x995c >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x9960 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x9964 >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x9968 >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0x996c >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0x9970 >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0x9974 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0x9978 >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0x997c >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0x9980 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0x9984 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0x9988 >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0x998c >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0x8c00 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x8c14 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x8c04 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x8c08 >> 2),
278         0x00000000,
279         (0x8000 << 16) | (0x9b7c >> 2),
280         0x00000000,
281         (0x8040 << 16) | (0x9b7c >> 2),
282         0x00000000,
283         (0x8000 << 16) | (0xe84 >> 2),
284         0x00000000,
285         (0x8040 << 16) | (0xe84 >> 2),
286         0x00000000,
287         (0x8000 << 16) | (0x89c0 >> 2),
288         0x00000000,
289         (0x8040 << 16) | (0x89c0 >> 2),
290         0x00000000,
291         (0x8000 << 16) | (0x914c >> 2),
292         0x00000000,
293         (0x8040 << 16) | (0x914c >> 2),
294         0x00000000,
295         (0x8000 << 16) | (0x8c20 >> 2),
296         0x00000000,
297         (0x8040 << 16) | (0x8c20 >> 2),
298         0x00000000,
299         (0x8000 << 16) | (0x9354 >> 2),
300         0x00000000,
301         (0x8040 << 16) | (0x9354 >> 2),
302         0x00000000,
303         (0x9c00 << 16) | (0x9060 >> 2),
304         0x00000000,
305         (0x9c00 << 16) | (0x9364 >> 2),
306         0x00000000,
307         (0x9c00 << 16) | (0x9100 >> 2),
308         0x00000000,
309         (0x9c00 << 16) | (0x913c >> 2),
310         0x00000000,
311         (0x8000 << 16) | (0x90e0 >> 2),
312         0x00000000,
313         (0x8000 << 16) | (0x90e4 >> 2),
314         0x00000000,
315         (0x8000 << 16) | (0x90e8 >> 2),
316         0x00000000,
317         (0x8040 << 16) | (0x90e0 >> 2),
318         0x00000000,
319         (0x8040 << 16) | (0x90e4 >> 2),
320         0x00000000,
321         (0x8040 << 16) | (0x90e8 >> 2),
322         0x00000000,
323         (0x9c00 << 16) | (0x8bcc >> 2),
324         0x00000000,
325         (0x9c00 << 16) | (0x8b24 >> 2),
326         0x00000000,
327         (0x9c00 << 16) | (0x88c4 >> 2),
328         0x00000000,
329         (0x9c00 << 16) | (0x8e50 >> 2),
330         0x00000000,
331         (0x9c00 << 16) | (0x8c0c >> 2),
332         0x00000000,
333         (0x9c00 << 16) | (0x8e58 >> 2),
334         0x00000000,
335         (0x9c00 << 16) | (0x8e5c >> 2),
336         0x00000000,
337         (0x9c00 << 16) | (0x9508 >> 2),
338         0x00000000,
339         (0x9c00 << 16) | (0x950c >> 2),
340         0x00000000,
341         (0x9c00 << 16) | (0x9494 >> 2),
342         0x00000000,
343         (0x9c00 << 16) | (0xac0c >> 2),
344         0x00000000,
345         (0x9c00 << 16) | (0xac10 >> 2),
346         0x00000000,
347         (0x9c00 << 16) | (0xac14 >> 2),
348         0x00000000,
349         (0x9c00 << 16) | (0xae00 >> 2),
350         0x00000000,
351         (0x9c00 << 16) | (0xac08 >> 2),
352         0x00000000,
353         (0x9c00 << 16) | (0x88d4 >> 2),
354         0x00000000,
355         (0x9c00 << 16) | (0x88c8 >> 2),
356         0x00000000,
357         (0x9c00 << 16) | (0x88cc >> 2),
358         0x00000000,
359         (0x9c00 << 16) | (0x89b0 >> 2),
360         0x00000000,
361         (0x9c00 << 16) | (0x8b10 >> 2),
362         0x00000000,
363         (0x9c00 << 16) | (0x8a14 >> 2),
364         0x00000000,
365         (0x9c00 << 16) | (0x9830 >> 2),
366         0x00000000,
367         (0x9c00 << 16) | (0x9834 >> 2),
368         0x00000000,
369         (0x9c00 << 16) | (0x9838 >> 2),
370         0x00000000,
371         (0x9c00 << 16) | (0x9a10 >> 2),
372         0x00000000,
373         (0x8000 << 16) | (0x9870 >> 2),
374         0x00000000,
375         (0x8000 << 16) | (0x9874 >> 2),
376         0x00000000,
377         (0x8001 << 16) | (0x9870 >> 2),
378         0x00000000,
379         (0x8001 << 16) | (0x9874 >> 2),
380         0x00000000,
381         (0x8040 << 16) | (0x9870 >> 2),
382         0x00000000,
383         (0x8040 << 16) | (0x9874 >> 2),
384         0x00000000,
385         (0x8041 << 16) | (0x9870 >> 2),
386         0x00000000,
387         (0x8041 << 16) | (0x9874 >> 2),
388         0x00000000,
389         0x00000000
390 };
391
392 static const u32 tahiti_golden_rlc_registers[] =
393 {
394         0xc424, 0xffffffff, 0x00601005,
395         0xc47c, 0xffffffff, 0x10104040,
396         0xc488, 0xffffffff, 0x0100000a,
397         0xc314, 0xffffffff, 0x00000800,
398         0xc30c, 0xffffffff, 0x800000f4,
399         0xf4a8, 0xffffffff, 0x00000000
400 };
401
402 static const u32 tahiti_golden_registers[] =
403 {
404         0x9a10, 0x00010000, 0x00018208,
405         0x9830, 0xffffffff, 0x00000000,
406         0x9834, 0xf00fffff, 0x00000400,
407         0x9838, 0x0002021c, 0x00020200,
408         0xc78, 0x00000080, 0x00000000,
409         0xd030, 0x000300c0, 0x00800040,
410         0xd830, 0x000300c0, 0x00800040,
411         0x5bb0, 0x000000f0, 0x00000070,
412         0x5bc0, 0x00200000, 0x50100000,
413         0x7030, 0x31000311, 0x00000011,
414         0x277c, 0x00000003, 0x000007ff,
415         0x240c, 0x000007ff, 0x00000000,
416         0x8a14, 0xf000001f, 0x00000007,
417         0x8b24, 0xffffffff, 0x00ffffff,
418         0x8b10, 0x0000ff0f, 0x00000000,
419         0x28a4c, 0x07ffffff, 0x4e000000,
420         0x28350, 0x3f3f3fff, 0x2a00126a,
421         0x30, 0x000000ff, 0x0040,
422         0x34, 0x00000040, 0x00004040,
423         0x9100, 0x07ffffff, 0x03000000,
424         0x8e88, 0x01ff1f3f, 0x00000000,
425         0x8e84, 0x01ff1f3f, 0x00000000,
426         0x9060, 0x0000007f, 0x00000020,
427         0x9508, 0x00010000, 0x00010000,
428         0xac14, 0x00000200, 0x000002fb,
429         0xac10, 0xffffffff, 0x0000543b,
430         0xac0c, 0xffffffff, 0xa9210876,
431         0x88d0, 0xffffffff, 0x000fff40,
432         0x88d4, 0x0000001f, 0x00000010,
433         0x1410, 0x20000000, 0x20fffed8,
434         0x15c0, 0x000c0fc0, 0x000c0400
435 };
436
437 static const u32 tahiti_golden_registers2[] =
438 {
439         0xc64, 0x00000001, 0x00000001
440 };
441
442 static const u32 pitcairn_golden_rlc_registers[] =
443 {
444         0xc424, 0xffffffff, 0x00601004,
445         0xc47c, 0xffffffff, 0x10102020,
446         0xc488, 0xffffffff, 0x01000020,
447         0xc314, 0xffffffff, 0x00000800,
448         0xc30c, 0xffffffff, 0x800000a4
449 };
450
451 static const u32 pitcairn_golden_registers[] =
452 {
453         0x9a10, 0x00010000, 0x00018208,
454         0x9830, 0xffffffff, 0x00000000,
455         0x9834, 0xf00fffff, 0x00000400,
456         0x9838, 0x0002021c, 0x00020200,
457         0xc78, 0x00000080, 0x00000000,
458         0xd030, 0x000300c0, 0x00800040,
459         0xd830, 0x000300c0, 0x00800040,
460         0x5bb0, 0x000000f0, 0x00000070,
461         0x5bc0, 0x00200000, 0x50100000,
462         0x7030, 0x31000311, 0x00000011,
463         0x2ae4, 0x00073ffe, 0x000022a2,
464         0x240c, 0x000007ff, 0x00000000,
465         0x8a14, 0xf000001f, 0x00000007,
466         0x8b24, 0xffffffff, 0x00ffffff,
467         0x8b10, 0x0000ff0f, 0x00000000,
468         0x28a4c, 0x07ffffff, 0x4e000000,
469         0x28350, 0x3f3f3fff, 0x2a00126a,
470         0x30, 0x000000ff, 0x0040,
471         0x34, 0x00000040, 0x00004040,
472         0x9100, 0x07ffffff, 0x03000000,
473         0x9060, 0x0000007f, 0x00000020,
474         0x9508, 0x00010000, 0x00010000,
475         0xac14, 0x000003ff, 0x000000f7,
476         0xac10, 0xffffffff, 0x00000000,
477         0xac0c, 0xffffffff, 0x32761054,
478         0x88d4, 0x0000001f, 0x00000010,
479         0x15c0, 0x000c0fc0, 0x000c0400
480 };
481
482 static const u32 verde_golden_rlc_registers[] =
483 {
484         0xc424, 0xffffffff, 0x033f1005,
485         0xc47c, 0xffffffff, 0x10808020,
486         0xc488, 0xffffffff, 0x00800008,
487         0xc314, 0xffffffff, 0x00001000,
488         0xc30c, 0xffffffff, 0x80010014
489 };
490
491 static const u32 verde_golden_registers[] =
492 {
493         0x9a10, 0x00010000, 0x00018208,
494         0x9830, 0xffffffff, 0x00000000,
495         0x9834, 0xf00fffff, 0x00000400,
496         0x9838, 0x0002021c, 0x00020200,
497         0xc78, 0x00000080, 0x00000000,
498         0xd030, 0x000300c0, 0x00800040,
499         0xd030, 0x000300c0, 0x00800040,
500         0xd830, 0x000300c0, 0x00800040,
501         0xd830, 0x000300c0, 0x00800040,
502         0x5bb0, 0x000000f0, 0x00000070,
503         0x5bc0, 0x00200000, 0x50100000,
504         0x7030, 0x31000311, 0x00000011,
505         0x2ae4, 0x00073ffe, 0x000022a2,
506         0x2ae4, 0x00073ffe, 0x000022a2,
507         0x2ae4, 0x00073ffe, 0x000022a2,
508         0x240c, 0x000007ff, 0x00000000,
509         0x240c, 0x000007ff, 0x00000000,
510         0x240c, 0x000007ff, 0x00000000,
511         0x8a14, 0xf000001f, 0x00000007,
512         0x8a14, 0xf000001f, 0x00000007,
513         0x8a14, 0xf000001f, 0x00000007,
514         0x8b24, 0xffffffff, 0x00ffffff,
515         0x8b10, 0x0000ff0f, 0x00000000,
516         0x28a4c, 0x07ffffff, 0x4e000000,
517         0x28350, 0x3f3f3fff, 0x0000124a,
518         0x28350, 0x3f3f3fff, 0x0000124a,
519         0x28350, 0x3f3f3fff, 0x0000124a,
520         0x30, 0x000000ff, 0x0040,
521         0x34, 0x00000040, 0x00004040,
522         0x9100, 0x07ffffff, 0x03000000,
523         0x9100, 0x07ffffff, 0x03000000,
524         0x8e88, 0x01ff1f3f, 0x00000000,
525         0x8e88, 0x01ff1f3f, 0x00000000,
526         0x8e88, 0x01ff1f3f, 0x00000000,
527         0x8e84, 0x01ff1f3f, 0x00000000,
528         0x8e84, 0x01ff1f3f, 0x00000000,
529         0x8e84, 0x01ff1f3f, 0x00000000,
530         0x9060, 0x0000007f, 0x00000020,
531         0x9508, 0x00010000, 0x00010000,
532         0xac14, 0x000003ff, 0x00000003,
533         0xac14, 0x000003ff, 0x00000003,
534         0xac14, 0x000003ff, 0x00000003,
535         0xac10, 0xffffffff, 0x00000000,
536         0xac10, 0xffffffff, 0x00000000,
537         0xac10, 0xffffffff, 0x00000000,
538         0xac0c, 0xffffffff, 0x00001032,
539         0xac0c, 0xffffffff, 0x00001032,
540         0xac0c, 0xffffffff, 0x00001032,
541         0x88d4, 0x0000001f, 0x00000010,
542         0x88d4, 0x0000001f, 0x00000010,
543         0x88d4, 0x0000001f, 0x00000010,
544         0x15c0, 0x000c0fc0, 0x000c0400
545 };
546
547 static const u32 oland_golden_rlc_registers[] =
548 {
549         0xc424, 0xffffffff, 0x00601005,
550         0xc47c, 0xffffffff, 0x10104040,
551         0xc488, 0xffffffff, 0x0100000a,
552         0xc314, 0xffffffff, 0x00000800,
553         0xc30c, 0xffffffff, 0x800000f4
554 };
555
556 static const u32 oland_golden_registers[] =
557 {
558         0x9a10, 0x00010000, 0x00018208,
559         0x9830, 0xffffffff, 0x00000000,
560         0x9834, 0xf00fffff, 0x00000400,
561         0x9838, 0x0002021c, 0x00020200,
562         0xc78, 0x00000080, 0x00000000,
563         0xd030, 0x000300c0, 0x00800040,
564         0xd830, 0x000300c0, 0x00800040,
565         0x5bb0, 0x000000f0, 0x00000070,
566         0x5bc0, 0x00200000, 0x50100000,
567         0x7030, 0x31000311, 0x00000011,
568         0x2ae4, 0x00073ffe, 0x000022a2,
569         0x240c, 0x000007ff, 0x00000000,
570         0x8a14, 0xf000001f, 0x00000007,
571         0x8b24, 0xffffffff, 0x00ffffff,
572         0x8b10, 0x0000ff0f, 0x00000000,
573         0x28a4c, 0x07ffffff, 0x4e000000,
574         0x28350, 0x3f3f3fff, 0x00000082,
575         0x30, 0x000000ff, 0x0040,
576         0x34, 0x00000040, 0x00004040,
577         0x9100, 0x07ffffff, 0x03000000,
578         0x9060, 0x0000007f, 0x00000020,
579         0x9508, 0x00010000, 0x00010000,
580         0xac14, 0x000003ff, 0x000000f3,
581         0xac10, 0xffffffff, 0x00000000,
582         0xac0c, 0xffffffff, 0x00003210,
583         0x88d4, 0x0000001f, 0x00000010,
584         0x15c0, 0x000c0fc0, 0x000c0400
585 };
586
587 static const u32 hainan_golden_registers[] =
588 {
589         0x9a10, 0x00010000, 0x00018208,
590         0x9830, 0xffffffff, 0x00000000,
591         0x9834, 0xf00fffff, 0x00000400,
592         0x9838, 0x0002021c, 0x00020200,
593         0xd0c0, 0xff000fff, 0x00000100,
594         0xd030, 0x000300c0, 0x00800040,
595         0xd8c0, 0xff000fff, 0x00000100,
596         0xd830, 0x000300c0, 0x00800040,
597         0x2ae4, 0x00073ffe, 0x000022a2,
598         0x240c, 0x000007ff, 0x00000000,
599         0x8a14, 0xf000001f, 0x00000007,
600         0x8b24, 0xffffffff, 0x00ffffff,
601         0x8b10, 0x0000ff0f, 0x00000000,
602         0x28a4c, 0x07ffffff, 0x4e000000,
603         0x28350, 0x3f3f3fff, 0x00000000,
604         0x30, 0x000000ff, 0x0040,
605         0x34, 0x00000040, 0x00004040,
606         0x9100, 0x03e00000, 0x03600000,
607         0x9060, 0x0000007f, 0x00000020,
608         0x9508, 0x00010000, 0x00010000,
609         0xac14, 0x000003ff, 0x000000f1,
610         0xac10, 0xffffffff, 0x00000000,
611         0xac0c, 0xffffffff, 0x00003210,
612         0x88d4, 0x0000001f, 0x00000010,
613         0x15c0, 0x000c0fc0, 0x000c0400
614 };
615
616 static const u32 hainan_golden_registers2[] =
617 {
618         0x98f8, 0xffffffff, 0x02010001
619 };
620
621 static const u32 tahiti_mgcg_cgcg_init[] =
622 {
623         0xc400, 0xffffffff, 0xfffffffc,
624         0x802c, 0xffffffff, 0xe0000000,
625         0x9a60, 0xffffffff, 0x00000100,
626         0x92a4, 0xffffffff, 0x00000100,
627         0xc164, 0xffffffff, 0x00000100,
628         0x9774, 0xffffffff, 0x00000100,
629         0x8984, 0xffffffff, 0x06000100,
630         0x8a18, 0xffffffff, 0x00000100,
631         0x92a0, 0xffffffff, 0x00000100,
632         0xc380, 0xffffffff, 0x00000100,
633         0x8b28, 0xffffffff, 0x00000100,
634         0x9144, 0xffffffff, 0x00000100,
635         0x8d88, 0xffffffff, 0x00000100,
636         0x8d8c, 0xffffffff, 0x00000100,
637         0x9030, 0xffffffff, 0x00000100,
638         0x9034, 0xffffffff, 0x00000100,
639         0x9038, 0xffffffff, 0x00000100,
640         0x903c, 0xffffffff, 0x00000100,
641         0xad80, 0xffffffff, 0x00000100,
642         0xac54, 0xffffffff, 0x00000100,
643         0x897c, 0xffffffff, 0x06000100,
644         0x9868, 0xffffffff, 0x00000100,
645         0x9510, 0xffffffff, 0x00000100,
646         0xaf04, 0xffffffff, 0x00000100,
647         0xae04, 0xffffffff, 0x00000100,
648         0x949c, 0xffffffff, 0x00000100,
649         0x802c, 0xffffffff, 0xe0000000,
650         0x9160, 0xffffffff, 0x00010000,
651         0x9164, 0xffffffff, 0x00030002,
652         0x9168, 0xffffffff, 0x00040007,
653         0x916c, 0xffffffff, 0x00060005,
654         0x9170, 0xffffffff, 0x00090008,
655         0x9174, 0xffffffff, 0x00020001,
656         0x9178, 0xffffffff, 0x00040003,
657         0x917c, 0xffffffff, 0x00000007,
658         0x9180, 0xffffffff, 0x00060005,
659         0x9184, 0xffffffff, 0x00090008,
660         0x9188, 0xffffffff, 0x00030002,
661         0x918c, 0xffffffff, 0x00050004,
662         0x9190, 0xffffffff, 0x00000008,
663         0x9194, 0xffffffff, 0x00070006,
664         0x9198, 0xffffffff, 0x000a0009,
665         0x919c, 0xffffffff, 0x00040003,
666         0x91a0, 0xffffffff, 0x00060005,
667         0x91a4, 0xffffffff, 0x00000009,
668         0x91a8, 0xffffffff, 0x00080007,
669         0x91ac, 0xffffffff, 0x000b000a,
670         0x91b0, 0xffffffff, 0x00050004,
671         0x91b4, 0xffffffff, 0x00070006,
672         0x91b8, 0xffffffff, 0x0008000b,
673         0x91bc, 0xffffffff, 0x000a0009,
674         0x91c0, 0xffffffff, 0x000d000c,
675         0x91c4, 0xffffffff, 0x00060005,
676         0x91c8, 0xffffffff, 0x00080007,
677         0x91cc, 0xffffffff, 0x0000000b,
678         0x91d0, 0xffffffff, 0x000a0009,
679         0x91d4, 0xffffffff, 0x000d000c,
680         0x91d8, 0xffffffff, 0x00070006,
681         0x91dc, 0xffffffff, 0x00090008,
682         0x91e0, 0xffffffff, 0x0000000c,
683         0x91e4, 0xffffffff, 0x000b000a,
684         0x91e8, 0xffffffff, 0x000e000d,
685         0x91ec, 0xffffffff, 0x00080007,
686         0x91f0, 0xffffffff, 0x000a0009,
687         0x91f4, 0xffffffff, 0x0000000d,
688         0x91f8, 0xffffffff, 0x000c000b,
689         0x91fc, 0xffffffff, 0x000f000e,
690         0x9200, 0xffffffff, 0x00090008,
691         0x9204, 0xffffffff, 0x000b000a,
692         0x9208, 0xffffffff, 0x000c000f,
693         0x920c, 0xffffffff, 0x000e000d,
694         0x9210, 0xffffffff, 0x00110010,
695         0x9214, 0xffffffff, 0x000a0009,
696         0x9218, 0xffffffff, 0x000c000b,
697         0x921c, 0xffffffff, 0x0000000f,
698         0x9220, 0xffffffff, 0x000e000d,
699         0x9224, 0xffffffff, 0x00110010,
700         0x9228, 0xffffffff, 0x000b000a,
701         0x922c, 0xffffffff, 0x000d000c,
702         0x9230, 0xffffffff, 0x00000010,
703         0x9234, 0xffffffff, 0x000f000e,
704         0x9238, 0xffffffff, 0x00120011,
705         0x923c, 0xffffffff, 0x000c000b,
706         0x9240, 0xffffffff, 0x000e000d,
707         0x9244, 0xffffffff, 0x00000011,
708         0x9248, 0xffffffff, 0x0010000f,
709         0x924c, 0xffffffff, 0x00130012,
710         0x9250, 0xffffffff, 0x000d000c,
711         0x9254, 0xffffffff, 0x000f000e,
712         0x9258, 0xffffffff, 0x00100013,
713         0x925c, 0xffffffff, 0x00120011,
714         0x9260, 0xffffffff, 0x00150014,
715         0x9264, 0xffffffff, 0x000e000d,
716         0x9268, 0xffffffff, 0x0010000f,
717         0x926c, 0xffffffff, 0x00000013,
718         0x9270, 0xffffffff, 0x00120011,
719         0x9274, 0xffffffff, 0x00150014,
720         0x9278, 0xffffffff, 0x000f000e,
721         0x927c, 0xffffffff, 0x00110010,
722         0x9280, 0xffffffff, 0x00000014,
723         0x9284, 0xffffffff, 0x00130012,
724         0x9288, 0xffffffff, 0x00160015,
725         0x928c, 0xffffffff, 0x0010000f,
726         0x9290, 0xffffffff, 0x00120011,
727         0x9294, 0xffffffff, 0x00000015,
728         0x9298, 0xffffffff, 0x00140013,
729         0x929c, 0xffffffff, 0x00170016,
730         0x9150, 0xffffffff, 0x96940200,
731         0x8708, 0xffffffff, 0x00900100,
732         0xc478, 0xffffffff, 0x00000080,
733         0xc404, 0xffffffff, 0x0020003f,
734         0x30, 0xffffffff, 0x0000001c,
735         0x34, 0x000f0000, 0x000f0000,
736         0x160c, 0xffffffff, 0x00000100,
737         0x1024, 0xffffffff, 0x00000100,
738         0x102c, 0x00000101, 0x00000000,
739         0x20a8, 0xffffffff, 0x00000104,
740         0x264c, 0x000c0000, 0x000c0000,
741         0x2648, 0x000c0000, 0x000c0000,
742         0x55e4, 0xff000fff, 0x00000100,
743         0x55e8, 0x00000001, 0x00000001,
744         0x2f50, 0x00000001, 0x00000001,
745         0x30cc, 0xc0000fff, 0x00000104,
746         0xc1e4, 0x00000001, 0x00000001,
747         0xd0c0, 0xfffffff0, 0x00000100,
748         0xd8c0, 0xfffffff0, 0x00000100
749 };
750
751 static const u32 pitcairn_mgcg_cgcg_init[] =
752 {
753         0xc400, 0xffffffff, 0xfffffffc,
754         0x802c, 0xffffffff, 0xe0000000,
755         0x9a60, 0xffffffff, 0x00000100,
756         0x92a4, 0xffffffff, 0x00000100,
757         0xc164, 0xffffffff, 0x00000100,
758         0x9774, 0xffffffff, 0x00000100,
759         0x8984, 0xffffffff, 0x06000100,
760         0x8a18, 0xffffffff, 0x00000100,
761         0x92a0, 0xffffffff, 0x00000100,
762         0xc380, 0xffffffff, 0x00000100,
763         0x8b28, 0xffffffff, 0x00000100,
764         0x9144, 0xffffffff, 0x00000100,
765         0x8d88, 0xffffffff, 0x00000100,
766         0x8d8c, 0xffffffff, 0x00000100,
767         0x9030, 0xffffffff, 0x00000100,
768         0x9034, 0xffffffff, 0x00000100,
769         0x9038, 0xffffffff, 0x00000100,
770         0x903c, 0xffffffff, 0x00000100,
771         0xad80, 0xffffffff, 0x00000100,
772         0xac54, 0xffffffff, 0x00000100,
773         0x897c, 0xffffffff, 0x06000100,
774         0x9868, 0xffffffff, 0x00000100,
775         0x9510, 0xffffffff, 0x00000100,
776         0xaf04, 0xffffffff, 0x00000100,
777         0xae04, 0xffffffff, 0x00000100,
778         0x949c, 0xffffffff, 0x00000100,
779         0x802c, 0xffffffff, 0xe0000000,
780         0x9160, 0xffffffff, 0x00010000,
781         0x9164, 0xffffffff, 0x00030002,
782         0x9168, 0xffffffff, 0x00040007,
783         0x916c, 0xffffffff, 0x00060005,
784         0x9170, 0xffffffff, 0x00090008,
785         0x9174, 0xffffffff, 0x00020001,
786         0x9178, 0xffffffff, 0x00040003,
787         0x917c, 0xffffffff, 0x00000007,
788         0x9180, 0xffffffff, 0x00060005,
789         0x9184, 0xffffffff, 0x00090008,
790         0x9188, 0xffffffff, 0x00030002,
791         0x918c, 0xffffffff, 0x00050004,
792         0x9190, 0xffffffff, 0x00000008,
793         0x9194, 0xffffffff, 0x00070006,
794         0x9198, 0xffffffff, 0x000a0009,
795         0x919c, 0xffffffff, 0x00040003,
796         0x91a0, 0xffffffff, 0x00060005,
797         0x91a4, 0xffffffff, 0x00000009,
798         0x91a8, 0xffffffff, 0x00080007,
799         0x91ac, 0xffffffff, 0x000b000a,
800         0x91b0, 0xffffffff, 0x00050004,
801         0x91b4, 0xffffffff, 0x00070006,
802         0x91b8, 0xffffffff, 0x0008000b,
803         0x91bc, 0xffffffff, 0x000a0009,
804         0x91c0, 0xffffffff, 0x000d000c,
805         0x9200, 0xffffffff, 0x00090008,
806         0x9204, 0xffffffff, 0x000b000a,
807         0x9208, 0xffffffff, 0x000c000f,
808         0x920c, 0xffffffff, 0x000e000d,
809         0x9210, 0xffffffff, 0x00110010,
810         0x9214, 0xffffffff, 0x000a0009,
811         0x9218, 0xffffffff, 0x000c000b,
812         0x921c, 0xffffffff, 0x0000000f,
813         0x9220, 0xffffffff, 0x000e000d,
814         0x9224, 0xffffffff, 0x00110010,
815         0x9228, 0xffffffff, 0x000b000a,
816         0x922c, 0xffffffff, 0x000d000c,
817         0x9230, 0xffffffff, 0x00000010,
818         0x9234, 0xffffffff, 0x000f000e,
819         0x9238, 0xffffffff, 0x00120011,
820         0x923c, 0xffffffff, 0x000c000b,
821         0x9240, 0xffffffff, 0x000e000d,
822         0x9244, 0xffffffff, 0x00000011,
823         0x9248, 0xffffffff, 0x0010000f,
824         0x924c, 0xffffffff, 0x00130012,
825         0x9250, 0xffffffff, 0x000d000c,
826         0x9254, 0xffffffff, 0x000f000e,
827         0x9258, 0xffffffff, 0x00100013,
828         0x925c, 0xffffffff, 0x00120011,
829         0x9260, 0xffffffff, 0x00150014,
830         0x9150, 0xffffffff, 0x96940200,
831         0x8708, 0xffffffff, 0x00900100,
832         0xc478, 0xffffffff, 0x00000080,
833         0xc404, 0xffffffff, 0x0020003f,
834         0x30, 0xffffffff, 0x0000001c,
835         0x34, 0x000f0000, 0x000f0000,
836         0x160c, 0xffffffff, 0x00000100,
837         0x1024, 0xffffffff, 0x00000100,
838         0x102c, 0x00000101, 0x00000000,
839         0x20a8, 0xffffffff, 0x00000104,
840         0x55e4, 0xff000fff, 0x00000100,
841         0x55e8, 0x00000001, 0x00000001,
842         0x2f50, 0x00000001, 0x00000001,
843         0x30cc, 0xc0000fff, 0x00000104,
844         0xc1e4, 0x00000001, 0x00000001,
845         0xd0c0, 0xfffffff0, 0x00000100,
846         0xd8c0, 0xfffffff0, 0x00000100
847 };
848
849 static const u32 verde_mgcg_cgcg_init[] =
850 {
851         0xc400, 0xffffffff, 0xfffffffc,
852         0x802c, 0xffffffff, 0xe0000000,
853         0x9a60, 0xffffffff, 0x00000100,
854         0x92a4, 0xffffffff, 0x00000100,
855         0xc164, 0xffffffff, 0x00000100,
856         0x9774, 0xffffffff, 0x00000100,
857         0x8984, 0xffffffff, 0x06000100,
858         0x8a18, 0xffffffff, 0x00000100,
859         0x92a0, 0xffffffff, 0x00000100,
860         0xc380, 0xffffffff, 0x00000100,
861         0x8b28, 0xffffffff, 0x00000100,
862         0x9144, 0xffffffff, 0x00000100,
863         0x8d88, 0xffffffff, 0x00000100,
864         0x8d8c, 0xffffffff, 0x00000100,
865         0x9030, 0xffffffff, 0x00000100,
866         0x9034, 0xffffffff, 0x00000100,
867         0x9038, 0xffffffff, 0x00000100,
868         0x903c, 0xffffffff, 0x00000100,
869         0xad80, 0xffffffff, 0x00000100,
870         0xac54, 0xffffffff, 0x00000100,
871         0x897c, 0xffffffff, 0x06000100,
872         0x9868, 0xffffffff, 0x00000100,
873         0x9510, 0xffffffff, 0x00000100,
874         0xaf04, 0xffffffff, 0x00000100,
875         0xae04, 0xffffffff, 0x00000100,
876         0x949c, 0xffffffff, 0x00000100,
877         0x802c, 0xffffffff, 0xe0000000,
878         0x9160, 0xffffffff, 0x00010000,
879         0x9164, 0xffffffff, 0x00030002,
880         0x9168, 0xffffffff, 0x00040007,
881         0x916c, 0xffffffff, 0x00060005,
882         0x9170, 0xffffffff, 0x00090008,
883         0x9174, 0xffffffff, 0x00020001,
884         0x9178, 0xffffffff, 0x00040003,
885         0x917c, 0xffffffff, 0x00000007,
886         0x9180, 0xffffffff, 0x00060005,
887         0x9184, 0xffffffff, 0x00090008,
888         0x9188, 0xffffffff, 0x00030002,
889         0x918c, 0xffffffff, 0x00050004,
890         0x9190, 0xffffffff, 0x00000008,
891         0x9194, 0xffffffff, 0x00070006,
892         0x9198, 0xffffffff, 0x000a0009,
893         0x919c, 0xffffffff, 0x00040003,
894         0x91a0, 0xffffffff, 0x00060005,
895         0x91a4, 0xffffffff, 0x00000009,
896         0x91a8, 0xffffffff, 0x00080007,
897         0x91ac, 0xffffffff, 0x000b000a,
898         0x91b0, 0xffffffff, 0x00050004,
899         0x91b4, 0xffffffff, 0x00070006,
900         0x91b8, 0xffffffff, 0x0008000b,
901         0x91bc, 0xffffffff, 0x000a0009,
902         0x91c0, 0xffffffff, 0x000d000c,
903         0x9200, 0xffffffff, 0x00090008,
904         0x9204, 0xffffffff, 0x000b000a,
905         0x9208, 0xffffffff, 0x000c000f,
906         0x920c, 0xffffffff, 0x000e000d,
907         0x9210, 0xffffffff, 0x00110010,
908         0x9214, 0xffffffff, 0x000a0009,
909         0x9218, 0xffffffff, 0x000c000b,
910         0x921c, 0xffffffff, 0x0000000f,
911         0x9220, 0xffffffff, 0x000e000d,
912         0x9224, 0xffffffff, 0x00110010,
913         0x9228, 0xffffffff, 0x000b000a,
914         0x922c, 0xffffffff, 0x000d000c,
915         0x9230, 0xffffffff, 0x00000010,
916         0x9234, 0xffffffff, 0x000f000e,
917         0x9238, 0xffffffff, 0x00120011,
918         0x923c, 0xffffffff, 0x000c000b,
919         0x9240, 0xffffffff, 0x000e000d,
920         0x9244, 0xffffffff, 0x00000011,
921         0x9248, 0xffffffff, 0x0010000f,
922         0x924c, 0xffffffff, 0x00130012,
923         0x9250, 0xffffffff, 0x000d000c,
924         0x9254, 0xffffffff, 0x000f000e,
925         0x9258, 0xffffffff, 0x00100013,
926         0x925c, 0xffffffff, 0x00120011,
927         0x9260, 0xffffffff, 0x00150014,
928         0x9150, 0xffffffff, 0x96940200,
929         0x8708, 0xffffffff, 0x00900100,
930         0xc478, 0xffffffff, 0x00000080,
931         0xc404, 0xffffffff, 0x0020003f,
932         0x30, 0xffffffff, 0x0000001c,
933         0x34, 0x000f0000, 0x000f0000,
934         0x160c, 0xffffffff, 0x00000100,
935         0x1024, 0xffffffff, 0x00000100,
936         0x102c, 0x00000101, 0x00000000,
937         0x20a8, 0xffffffff, 0x00000104,
938         0x264c, 0x000c0000, 0x000c0000,
939         0x2648, 0x000c0000, 0x000c0000,
940         0x55e4, 0xff000fff, 0x00000100,
941         0x55e8, 0x00000001, 0x00000001,
942         0x2f50, 0x00000001, 0x00000001,
943         0x30cc, 0xc0000fff, 0x00000104,
944         0xc1e4, 0x00000001, 0x00000001,
945         0xd0c0, 0xfffffff0, 0x00000100,
946         0xd8c0, 0xfffffff0, 0x00000100
947 };
948
949 static const u32 oland_mgcg_cgcg_init[] =
950 {
951         0xc400, 0xffffffff, 0xfffffffc,
952         0x802c, 0xffffffff, 0xe0000000,
953         0x9a60, 0xffffffff, 0x00000100,
954         0x92a4, 0xffffffff, 0x00000100,
955         0xc164, 0xffffffff, 0x00000100,
956         0x9774, 0xffffffff, 0x00000100,
957         0x8984, 0xffffffff, 0x06000100,
958         0x8a18, 0xffffffff, 0x00000100,
959         0x92a0, 0xffffffff, 0x00000100,
960         0xc380, 0xffffffff, 0x00000100,
961         0x8b28, 0xffffffff, 0x00000100,
962         0x9144, 0xffffffff, 0x00000100,
963         0x8d88, 0xffffffff, 0x00000100,
964         0x8d8c, 0xffffffff, 0x00000100,
965         0x9030, 0xffffffff, 0x00000100,
966         0x9034, 0xffffffff, 0x00000100,
967         0x9038, 0xffffffff, 0x00000100,
968         0x903c, 0xffffffff, 0x00000100,
969         0xad80, 0xffffffff, 0x00000100,
970         0xac54, 0xffffffff, 0x00000100,
971         0x897c, 0xffffffff, 0x06000100,
972         0x9868, 0xffffffff, 0x00000100,
973         0x9510, 0xffffffff, 0x00000100,
974         0xaf04, 0xffffffff, 0x00000100,
975         0xae04, 0xffffffff, 0x00000100,
976         0x949c, 0xffffffff, 0x00000100,
977         0x802c, 0xffffffff, 0xe0000000,
978         0x9160, 0xffffffff, 0x00010000,
979         0x9164, 0xffffffff, 0x00030002,
980         0x9168, 0xffffffff, 0x00040007,
981         0x916c, 0xffffffff, 0x00060005,
982         0x9170, 0xffffffff, 0x00090008,
983         0x9174, 0xffffffff, 0x00020001,
984         0x9178, 0xffffffff, 0x00040003,
985         0x917c, 0xffffffff, 0x00000007,
986         0x9180, 0xffffffff, 0x00060005,
987         0x9184, 0xffffffff, 0x00090008,
988         0x9188, 0xffffffff, 0x00030002,
989         0x918c, 0xffffffff, 0x00050004,
990         0x9190, 0xffffffff, 0x00000008,
991         0x9194, 0xffffffff, 0x00070006,
992         0x9198, 0xffffffff, 0x000a0009,
993         0x919c, 0xffffffff, 0x00040003,
994         0x91a0, 0xffffffff, 0x00060005,
995         0x91a4, 0xffffffff, 0x00000009,
996         0x91a8, 0xffffffff, 0x00080007,
997         0x91ac, 0xffffffff, 0x000b000a,
998         0x91b0, 0xffffffff, 0x00050004,
999         0x91b4, 0xffffffff, 0x00070006,
1000         0x91b8, 0xffffffff, 0x0008000b,
1001         0x91bc, 0xffffffff, 0x000a0009,
1002         0x91c0, 0xffffffff, 0x000d000c,
1003         0x91c4, 0xffffffff, 0x00060005,
1004         0x91c8, 0xffffffff, 0x00080007,
1005         0x91cc, 0xffffffff, 0x0000000b,
1006         0x91d0, 0xffffffff, 0x000a0009,
1007         0x91d4, 0xffffffff, 0x000d000c,
1008         0x9150, 0xffffffff, 0x96940200,
1009         0x8708, 0xffffffff, 0x00900100,
1010         0xc478, 0xffffffff, 0x00000080,
1011         0xc404, 0xffffffff, 0x0020003f,
1012         0x30, 0xffffffff, 0x0000001c,
1013         0x34, 0x000f0000, 0x000f0000,
1014         0x160c, 0xffffffff, 0x00000100,
1015         0x1024, 0xffffffff, 0x00000100,
1016         0x102c, 0x00000101, 0x00000000,
1017         0x20a8, 0xffffffff, 0x00000104,
1018         0x264c, 0x000c0000, 0x000c0000,
1019         0x2648, 0x000c0000, 0x000c0000,
1020         0x55e4, 0xff000fff, 0x00000100,
1021         0x55e8, 0x00000001, 0x00000001,
1022         0x2f50, 0x00000001, 0x00000001,
1023         0x30cc, 0xc0000fff, 0x00000104,
1024         0xc1e4, 0x00000001, 0x00000001,
1025         0xd0c0, 0xfffffff0, 0x00000100,
1026         0xd8c0, 0xfffffff0, 0x00000100
1027 };
1028
1029 static const u32 hainan_mgcg_cgcg_init[] =
1030 {
1031         0xc400, 0xffffffff, 0xfffffffc,
1032         0x802c, 0xffffffff, 0xe0000000,
1033         0x9a60, 0xffffffff, 0x00000100,
1034         0x92a4, 0xffffffff, 0x00000100,
1035         0xc164, 0xffffffff, 0x00000100,
1036         0x9774, 0xffffffff, 0x00000100,
1037         0x8984, 0xffffffff, 0x06000100,
1038         0x8a18, 0xffffffff, 0x00000100,
1039         0x92a0, 0xffffffff, 0x00000100,
1040         0xc380, 0xffffffff, 0x00000100,
1041         0x8b28, 0xffffffff, 0x00000100,
1042         0x9144, 0xffffffff, 0x00000100,
1043         0x8d88, 0xffffffff, 0x00000100,
1044         0x8d8c, 0xffffffff, 0x00000100,
1045         0x9030, 0xffffffff, 0x00000100,
1046         0x9034, 0xffffffff, 0x00000100,
1047         0x9038, 0xffffffff, 0x00000100,
1048         0x903c, 0xffffffff, 0x00000100,
1049         0xad80, 0xffffffff, 0x00000100,
1050         0xac54, 0xffffffff, 0x00000100,
1051         0x897c, 0xffffffff, 0x06000100,
1052         0x9868, 0xffffffff, 0x00000100,
1053         0x9510, 0xffffffff, 0x00000100,
1054         0xaf04, 0xffffffff, 0x00000100,
1055         0xae04, 0xffffffff, 0x00000100,
1056         0x949c, 0xffffffff, 0x00000100,
1057         0x802c, 0xffffffff, 0xe0000000,
1058         0x9160, 0xffffffff, 0x00010000,
1059         0x9164, 0xffffffff, 0x00030002,
1060         0x9168, 0xffffffff, 0x00040007,
1061         0x916c, 0xffffffff, 0x00060005,
1062         0x9170, 0xffffffff, 0x00090008,
1063         0x9174, 0xffffffff, 0x00020001,
1064         0x9178, 0xffffffff, 0x00040003,
1065         0x917c, 0xffffffff, 0x00000007,
1066         0x9180, 0xffffffff, 0x00060005,
1067         0x9184, 0xffffffff, 0x00090008,
1068         0x9188, 0xffffffff, 0x00030002,
1069         0x918c, 0xffffffff, 0x00050004,
1070         0x9190, 0xffffffff, 0x00000008,
1071         0x9194, 0xffffffff, 0x00070006,
1072         0x9198, 0xffffffff, 0x000a0009,
1073         0x919c, 0xffffffff, 0x00040003,
1074         0x91a0, 0xffffffff, 0x00060005,
1075         0x91a4, 0xffffffff, 0x00000009,
1076         0x91a8, 0xffffffff, 0x00080007,
1077         0x91ac, 0xffffffff, 0x000b000a,
1078         0x91b0, 0xffffffff, 0x00050004,
1079         0x91b4, 0xffffffff, 0x00070006,
1080         0x91b8, 0xffffffff, 0x0008000b,
1081         0x91bc, 0xffffffff, 0x000a0009,
1082         0x91c0, 0xffffffff, 0x000d000c,
1083         0x91c4, 0xffffffff, 0x00060005,
1084         0x91c8, 0xffffffff, 0x00080007,
1085         0x91cc, 0xffffffff, 0x0000000b,
1086         0x91d0, 0xffffffff, 0x000a0009,
1087         0x91d4, 0xffffffff, 0x000d000c,
1088         0x9150, 0xffffffff, 0x96940200,
1089         0x8708, 0xffffffff, 0x00900100,
1090         0xc478, 0xffffffff, 0x00000080,
1091         0xc404, 0xffffffff, 0x0020003f,
1092         0x30, 0xffffffff, 0x0000001c,
1093         0x34, 0x000f0000, 0x000f0000,
1094         0x160c, 0xffffffff, 0x00000100,
1095         0x1024, 0xffffffff, 0x00000100,
1096         0x20a8, 0xffffffff, 0x00000104,
1097         0x264c, 0x000c0000, 0x000c0000,
1098         0x2648, 0x000c0000, 0x000c0000,
1099         0x2f50, 0x00000001, 0x00000001,
1100         0x30cc, 0xc0000fff, 0x00000104,
1101         0xc1e4, 0x00000001, 0x00000001,
1102         0xd0c0, 0xfffffff0, 0x00000100,
1103         0xd8c0, 0xfffffff0, 0x00000100
1104 };
1105
1106 static u32 verde_pg_init[] =
1107 {
1108         0x353c, 0xffffffff, 0x40000,
1109         0x3538, 0xffffffff, 0x200010ff,
1110         0x353c, 0xffffffff, 0x0,
1111         0x353c, 0xffffffff, 0x0,
1112         0x353c, 0xffffffff, 0x0,
1113         0x353c, 0xffffffff, 0x0,
1114         0x353c, 0xffffffff, 0x0,
1115         0x353c, 0xffffffff, 0x7007,
1116         0x3538, 0xffffffff, 0x300010ff,
1117         0x353c, 0xffffffff, 0x0,
1118         0x353c, 0xffffffff, 0x0,
1119         0x353c, 0xffffffff, 0x0,
1120         0x353c, 0xffffffff, 0x0,
1121         0x353c, 0xffffffff, 0x0,
1122         0x353c, 0xffffffff, 0x400000,
1123         0x3538, 0xffffffff, 0x100010ff,
1124         0x353c, 0xffffffff, 0x0,
1125         0x353c, 0xffffffff, 0x0,
1126         0x353c, 0xffffffff, 0x0,
1127         0x353c, 0xffffffff, 0x0,
1128         0x353c, 0xffffffff, 0x0,
1129         0x353c, 0xffffffff, 0x120200,
1130         0x3538, 0xffffffff, 0x500010ff,
1131         0x353c, 0xffffffff, 0x0,
1132         0x353c, 0xffffffff, 0x0,
1133         0x353c, 0xffffffff, 0x0,
1134         0x353c, 0xffffffff, 0x0,
1135         0x353c, 0xffffffff, 0x0,
1136         0x353c, 0xffffffff, 0x1e1e16,
1137         0x3538, 0xffffffff, 0x600010ff,
1138         0x353c, 0xffffffff, 0x0,
1139         0x353c, 0xffffffff, 0x0,
1140         0x353c, 0xffffffff, 0x0,
1141         0x353c, 0xffffffff, 0x0,
1142         0x353c, 0xffffffff, 0x0,
1143         0x353c, 0xffffffff, 0x171f1e,
1144         0x3538, 0xffffffff, 0x700010ff,
1145         0x353c, 0xffffffff, 0x0,
1146         0x353c, 0xffffffff, 0x0,
1147         0x353c, 0xffffffff, 0x0,
1148         0x353c, 0xffffffff, 0x0,
1149         0x353c, 0xffffffff, 0x0,
1150         0x353c, 0xffffffff, 0x0,
1151         0x3538, 0xffffffff, 0x9ff,
1152         0x3500, 0xffffffff, 0x0,
1153         0x3504, 0xffffffff, 0x10000800,
1154         0x3504, 0xffffffff, 0xf,
1155         0x3504, 0xffffffff, 0xf,
1156         0x3500, 0xffffffff, 0x4,
1157         0x3504, 0xffffffff, 0x1000051e,
1158         0x3504, 0xffffffff, 0xffff,
1159         0x3504, 0xffffffff, 0xffff,
1160         0x3500, 0xffffffff, 0x8,
1161         0x3504, 0xffffffff, 0x80500,
1162         0x3500, 0xffffffff, 0x12,
1163         0x3504, 0xffffffff, 0x9050c,
1164         0x3500, 0xffffffff, 0x1d,
1165         0x3504, 0xffffffff, 0xb052c,
1166         0x3500, 0xffffffff, 0x2a,
1167         0x3504, 0xffffffff, 0x1053e,
1168         0x3500, 0xffffffff, 0x2d,
1169         0x3504, 0xffffffff, 0x10546,
1170         0x3500, 0xffffffff, 0x30,
1171         0x3504, 0xffffffff, 0xa054e,
1172         0x3500, 0xffffffff, 0x3c,
1173         0x3504, 0xffffffff, 0x1055f,
1174         0x3500, 0xffffffff, 0x3f,
1175         0x3504, 0xffffffff, 0x10567,
1176         0x3500, 0xffffffff, 0x42,
1177         0x3504, 0xffffffff, 0x1056f,
1178         0x3500, 0xffffffff, 0x45,
1179         0x3504, 0xffffffff, 0x10572,
1180         0x3500, 0xffffffff, 0x48,
1181         0x3504, 0xffffffff, 0x20575,
1182         0x3500, 0xffffffff, 0x4c,
1183         0x3504, 0xffffffff, 0x190801,
1184         0x3500, 0xffffffff, 0x67,
1185         0x3504, 0xffffffff, 0x1082a,
1186         0x3500, 0xffffffff, 0x6a,
1187         0x3504, 0xffffffff, 0x1b082d,
1188         0x3500, 0xffffffff, 0x87,
1189         0x3504, 0xffffffff, 0x310851,
1190         0x3500, 0xffffffff, 0xba,
1191         0x3504, 0xffffffff, 0x891,
1192         0x3500, 0xffffffff, 0xbc,
1193         0x3504, 0xffffffff, 0x893,
1194         0x3500, 0xffffffff, 0xbe,
1195         0x3504, 0xffffffff, 0x20895,
1196         0x3500, 0xffffffff, 0xc2,
1197         0x3504, 0xffffffff, 0x20899,
1198         0x3500, 0xffffffff, 0xc6,
1199         0x3504, 0xffffffff, 0x2089d,
1200         0x3500, 0xffffffff, 0xca,
1201         0x3504, 0xffffffff, 0x8a1,
1202         0x3500, 0xffffffff, 0xcc,
1203         0x3504, 0xffffffff, 0x8a3,
1204         0x3500, 0xffffffff, 0xce,
1205         0x3504, 0xffffffff, 0x308a5,
1206         0x3500, 0xffffffff, 0xd3,
1207         0x3504, 0xffffffff, 0x6d08cd,
1208         0x3500, 0xffffffff, 0x142,
1209         0x3504, 0xffffffff, 0x2000095a,
1210         0x3504, 0xffffffff, 0x1,
1211         0x3500, 0xffffffff, 0x144,
1212         0x3504, 0xffffffff, 0x301f095b,
1213         0x3500, 0xffffffff, 0x165,
1214         0x3504, 0xffffffff, 0xc094d,
1215         0x3500, 0xffffffff, 0x173,
1216         0x3504, 0xffffffff, 0xf096d,
1217         0x3500, 0xffffffff, 0x184,
1218         0x3504, 0xffffffff, 0x15097f,
1219         0x3500, 0xffffffff, 0x19b,
1220         0x3504, 0xffffffff, 0xc0998,
1221         0x3500, 0xffffffff, 0x1a9,
1222         0x3504, 0xffffffff, 0x409a7,
1223         0x3500, 0xffffffff, 0x1af,
1224         0x3504, 0xffffffff, 0xcdc,
1225         0x3500, 0xffffffff, 0x1b1,
1226         0x3504, 0xffffffff, 0x800,
1227         0x3508, 0xffffffff, 0x6c9b2000,
1228         0x3510, 0xfc00, 0x2000,
1229         0x3544, 0xffffffff, 0xfc0,
1230         0x28d4, 0x00000100, 0x100
1231 };
1232
1233 static void si_init_golden_registers(struct radeon_device *rdev)
1234 {
1235         switch (rdev->family) {
1236         case CHIP_TAHITI:
1237                 radeon_program_register_sequence(rdev,
1238                                                  tahiti_golden_registers,
1239                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1240                 radeon_program_register_sequence(rdev,
1241                                                  tahiti_golden_rlc_registers,
1242                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1243                 radeon_program_register_sequence(rdev,
1244                                                  tahiti_mgcg_cgcg_init,
1245                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1246                 radeon_program_register_sequence(rdev,
1247                                                  tahiti_golden_registers2,
1248                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1249                 break;
1250         case CHIP_PITCAIRN:
1251                 radeon_program_register_sequence(rdev,
1252                                                  pitcairn_golden_registers,
1253                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1254                 radeon_program_register_sequence(rdev,
1255                                                  pitcairn_golden_rlc_registers,
1256                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1257                 radeon_program_register_sequence(rdev,
1258                                                  pitcairn_mgcg_cgcg_init,
1259                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1260                 break;
1261         case CHIP_VERDE:
1262                 radeon_program_register_sequence(rdev,
1263                                                  verde_golden_registers,
1264                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1265                 radeon_program_register_sequence(rdev,
1266                                                  verde_golden_rlc_registers,
1267                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1268                 radeon_program_register_sequence(rdev,
1269                                                  verde_mgcg_cgcg_init,
1270                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1271                 radeon_program_register_sequence(rdev,
1272                                                  verde_pg_init,
1273                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1274                 break;
1275         case CHIP_OLAND:
1276                 radeon_program_register_sequence(rdev,
1277                                                  oland_golden_registers,
1278                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1279                 radeon_program_register_sequence(rdev,
1280                                                  oland_golden_rlc_registers,
1281                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1282                 radeon_program_register_sequence(rdev,
1283                                                  oland_mgcg_cgcg_init,
1284                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1285                 break;
1286         case CHIP_HAINAN:
1287                 radeon_program_register_sequence(rdev,
1288                                                  hainan_golden_registers,
1289                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1290                 radeon_program_register_sequence(rdev,
1291                                                  hainan_golden_registers2,
1292                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1293                 radeon_program_register_sequence(rdev,
1294                                                  hainan_mgcg_cgcg_init,
1295                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1296                 break;
1297         default:
1298                 break;
1299         }
1300 }
1301
1302 /**
1303  * si_get_allowed_info_register - fetch the register for the info ioctl
1304  *
1305  * @rdev: radeon_device pointer
1306  * @reg: register offset in bytes
1307  * @val: register value
1308  *
1309  * Returns 0 for success or -EINVAL for an invalid register
1310  *
1311  */
1312 int si_get_allowed_info_register(struct radeon_device *rdev,
1313                                  u32 reg, u32 *val)
1314 {
1315         switch (reg) {
1316         case GRBM_STATUS:
1317         case GRBM_STATUS2:
1318         case GRBM_STATUS_SE0:
1319         case GRBM_STATUS_SE1:
1320         case SRBM_STATUS:
1321         case SRBM_STATUS2:
1322         case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1323         case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1324         case UVD_STATUS:
1325                 *val = RREG32(reg);
1326                 return 0;
1327         default:
1328                 return -EINVAL;
1329         }
1330 }
1331
1332 #define PCIE_BUS_CLK                10000
1333 #define TCLK                        (PCIE_BUS_CLK / 10)
1334
1335 /**
1336  * si_get_xclk - get the xclk
1337  *
1338  * @rdev: radeon_device pointer
1339  *
1340  * Returns the reference clock used by the gfx engine
1341  * (SI).
1342  */
1343 u32 si_get_xclk(struct radeon_device *rdev)
1344 {
1345         u32 reference_clock = rdev->clock.spll.reference_freq;
1346         u32 tmp;
1347
1348         tmp = RREG32(CG_CLKPIN_CNTL_2);
1349         if (tmp & MUX_TCLK_TO_XCLK)
1350                 return TCLK;
1351
1352         tmp = RREG32(CG_CLKPIN_CNTL);
1353         if (tmp & XTALIN_DIVIDE)
1354                 return reference_clock / 4;
1355
1356         return reference_clock;
1357 }
1358
1359 /* get temperature in millidegrees */
1360 int si_get_temp(struct radeon_device *rdev)
1361 {
1362         u32 temp;
1363         int actual_temp = 0;
1364
1365         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1366                 CTF_TEMP_SHIFT;
1367
1368         if (temp & 0x200)
1369                 actual_temp = 255;
1370         else
1371                 actual_temp = temp & 0x1ff;
1372
1373         actual_temp = (actual_temp * 1000);
1374
1375         return actual_temp;
1376 }
1377
1378 #define TAHITI_IO_MC_REGS_SIZE 36
1379
1380 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1381         {0x0000006f, 0x03044000},
1382         {0x00000070, 0x0480c018},
1383         {0x00000071, 0x00000040},
1384         {0x00000072, 0x01000000},
1385         {0x00000074, 0x000000ff},
1386         {0x00000075, 0x00143400},
1387         {0x00000076, 0x08ec0800},
1388         {0x00000077, 0x040000cc},
1389         {0x00000079, 0x00000000},
1390         {0x0000007a, 0x21000409},
1391         {0x0000007c, 0x00000000},
1392         {0x0000007d, 0xe8000000},
1393         {0x0000007e, 0x044408a8},
1394         {0x0000007f, 0x00000003},
1395         {0x00000080, 0x00000000},
1396         {0x00000081, 0x01000000},
1397         {0x00000082, 0x02000000},
1398         {0x00000083, 0x00000000},
1399         {0x00000084, 0xe3f3e4f4},
1400         {0x00000085, 0x00052024},
1401         {0x00000087, 0x00000000},
1402         {0x00000088, 0x66036603},
1403         {0x00000089, 0x01000000},
1404         {0x0000008b, 0x1c0a0000},
1405         {0x0000008c, 0xff010000},
1406         {0x0000008e, 0xffffefff},
1407         {0x0000008f, 0xfff3efff},
1408         {0x00000090, 0xfff3efbf},
1409         {0x00000094, 0x00101101},
1410         {0x00000095, 0x00000fff},
1411         {0x00000096, 0x00116fff},
1412         {0x00000097, 0x60010000},
1413         {0x00000098, 0x10010000},
1414         {0x00000099, 0x00006000},
1415         {0x0000009a, 0x00001000},
1416         {0x0000009f, 0x00a77400}
1417 };
1418
1419 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1420         {0x0000006f, 0x03044000},
1421         {0x00000070, 0x0480c018},
1422         {0x00000071, 0x00000040},
1423         {0x00000072, 0x01000000},
1424         {0x00000074, 0x000000ff},
1425         {0x00000075, 0x00143400},
1426         {0x00000076, 0x08ec0800},
1427         {0x00000077, 0x040000cc},
1428         {0x00000079, 0x00000000},
1429         {0x0000007a, 0x21000409},
1430         {0x0000007c, 0x00000000},
1431         {0x0000007d, 0xe8000000},
1432         {0x0000007e, 0x044408a8},
1433         {0x0000007f, 0x00000003},
1434         {0x00000080, 0x00000000},
1435         {0x00000081, 0x01000000},
1436         {0x00000082, 0x02000000},
1437         {0x00000083, 0x00000000},
1438         {0x00000084, 0xe3f3e4f4},
1439         {0x00000085, 0x00052024},
1440         {0x00000087, 0x00000000},
1441         {0x00000088, 0x66036603},
1442         {0x00000089, 0x01000000},
1443         {0x0000008b, 0x1c0a0000},
1444         {0x0000008c, 0xff010000},
1445         {0x0000008e, 0xffffefff},
1446         {0x0000008f, 0xfff3efff},
1447         {0x00000090, 0xfff3efbf},
1448         {0x00000094, 0x00101101},
1449         {0x00000095, 0x00000fff},
1450         {0x00000096, 0x00116fff},
1451         {0x00000097, 0x60010000},
1452         {0x00000098, 0x10010000},
1453         {0x00000099, 0x00006000},
1454         {0x0000009a, 0x00001000},
1455         {0x0000009f, 0x00a47400}
1456 };
1457
1458 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1459         {0x0000006f, 0x03044000},
1460         {0x00000070, 0x0480c018},
1461         {0x00000071, 0x00000040},
1462         {0x00000072, 0x01000000},
1463         {0x00000074, 0x000000ff},
1464         {0x00000075, 0x00143400},
1465         {0x00000076, 0x08ec0800},
1466         {0x00000077, 0x040000cc},
1467         {0x00000079, 0x00000000},
1468         {0x0000007a, 0x21000409},
1469         {0x0000007c, 0x00000000},
1470         {0x0000007d, 0xe8000000},
1471         {0x0000007e, 0x044408a8},
1472         {0x0000007f, 0x00000003},
1473         {0x00000080, 0x00000000},
1474         {0x00000081, 0x01000000},
1475         {0x00000082, 0x02000000},
1476         {0x00000083, 0x00000000},
1477         {0x00000084, 0xe3f3e4f4},
1478         {0x00000085, 0x00052024},
1479         {0x00000087, 0x00000000},
1480         {0x00000088, 0x66036603},
1481         {0x00000089, 0x01000000},
1482         {0x0000008b, 0x1c0a0000},
1483         {0x0000008c, 0xff010000},
1484         {0x0000008e, 0xffffefff},
1485         {0x0000008f, 0xfff3efff},
1486         {0x00000090, 0xfff3efbf},
1487         {0x00000094, 0x00101101},
1488         {0x00000095, 0x00000fff},
1489         {0x00000096, 0x00116fff},
1490         {0x00000097, 0x60010000},
1491         {0x00000098, 0x10010000},
1492         {0x00000099, 0x00006000},
1493         {0x0000009a, 0x00001000},
1494         {0x0000009f, 0x00a37400}
1495 };
1496
1497 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1498         {0x0000006f, 0x03044000},
1499         {0x00000070, 0x0480c018},
1500         {0x00000071, 0x00000040},
1501         {0x00000072, 0x01000000},
1502         {0x00000074, 0x000000ff},
1503         {0x00000075, 0x00143400},
1504         {0x00000076, 0x08ec0800},
1505         {0x00000077, 0x040000cc},
1506         {0x00000079, 0x00000000},
1507         {0x0000007a, 0x21000409},
1508         {0x0000007c, 0x00000000},
1509         {0x0000007d, 0xe8000000},
1510         {0x0000007e, 0x044408a8},
1511         {0x0000007f, 0x00000003},
1512         {0x00000080, 0x00000000},
1513         {0x00000081, 0x01000000},
1514         {0x00000082, 0x02000000},
1515         {0x00000083, 0x00000000},
1516         {0x00000084, 0xe3f3e4f4},
1517         {0x00000085, 0x00052024},
1518         {0x00000087, 0x00000000},
1519         {0x00000088, 0x66036603},
1520         {0x00000089, 0x01000000},
1521         {0x0000008b, 0x1c0a0000},
1522         {0x0000008c, 0xff010000},
1523         {0x0000008e, 0xffffefff},
1524         {0x0000008f, 0xfff3efff},
1525         {0x00000090, 0xfff3efbf},
1526         {0x00000094, 0x00101101},
1527         {0x00000095, 0x00000fff},
1528         {0x00000096, 0x00116fff},
1529         {0x00000097, 0x60010000},
1530         {0x00000098, 0x10010000},
1531         {0x00000099, 0x00006000},
1532         {0x0000009a, 0x00001000},
1533         {0x0000009f, 0x00a17730}
1534 };
1535
1536 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1537         {0x0000006f, 0x03044000},
1538         {0x00000070, 0x0480c018},
1539         {0x00000071, 0x00000040},
1540         {0x00000072, 0x01000000},
1541         {0x00000074, 0x000000ff},
1542         {0x00000075, 0x00143400},
1543         {0x00000076, 0x08ec0800},
1544         {0x00000077, 0x040000cc},
1545         {0x00000079, 0x00000000},
1546         {0x0000007a, 0x21000409},
1547         {0x0000007c, 0x00000000},
1548         {0x0000007d, 0xe8000000},
1549         {0x0000007e, 0x044408a8},
1550         {0x0000007f, 0x00000003},
1551         {0x00000080, 0x00000000},
1552         {0x00000081, 0x01000000},
1553         {0x00000082, 0x02000000},
1554         {0x00000083, 0x00000000},
1555         {0x00000084, 0xe3f3e4f4},
1556         {0x00000085, 0x00052024},
1557         {0x00000087, 0x00000000},
1558         {0x00000088, 0x66036603},
1559         {0x00000089, 0x01000000},
1560         {0x0000008b, 0x1c0a0000},
1561         {0x0000008c, 0xff010000},
1562         {0x0000008e, 0xffffefff},
1563         {0x0000008f, 0xfff3efff},
1564         {0x00000090, 0xfff3efbf},
1565         {0x00000094, 0x00101101},
1566         {0x00000095, 0x00000fff},
1567         {0x00000096, 0x00116fff},
1568         {0x00000097, 0x60010000},
1569         {0x00000098, 0x10010000},
1570         {0x00000099, 0x00006000},
1571         {0x0000009a, 0x00001000},
1572         {0x0000009f, 0x00a07730}
1573 };
1574
1575 /* ucode loading */
1576 int si_mc_load_microcode(struct radeon_device *rdev)
1577 {
1578         const __be32 *fw_data = NULL;
1579         const __le32 *new_fw_data = NULL;
1580         u32 running;
1581         u32 *io_mc_regs = NULL;
1582         const __le32 *new_io_mc_regs = NULL;
1583         int i, regs_size, ucode_size;
1584
1585         if (!rdev->mc_fw)
1586                 return -EINVAL;
1587
1588         if (rdev->new_fw) {
1589                 const struct mc_firmware_header_v1_0 *hdr =
1590                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1591
1592                 radeon_ucode_print_mc_hdr(&hdr->header);
1593                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1594                 new_io_mc_regs = (const __le32 *)
1595                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1596                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1597                 new_fw_data = (const __le32 *)
1598                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1599         } else {
1600                 ucode_size = rdev->mc_fw->size / 4;
1601
1602                 switch (rdev->family) {
1603                 case CHIP_TAHITI:
1604                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1605                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1606                         break;
1607                 case CHIP_PITCAIRN:
1608                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1609                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1610                         break;
1611                 case CHIP_VERDE:
1612                 default:
1613                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1614                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1615                         break;
1616                 case CHIP_OLAND:
1617                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1618                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1619                         break;
1620                 case CHIP_HAINAN:
1621                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1622                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1623                         break;
1624                 }
1625                 fw_data = (const __be32 *)rdev->mc_fw->data;
1626         }
1627
1628         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1629
1630         if (running == 0) {
1631                 /* reset the engine and set to writable */
1632                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1633                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1634
1635                 /* load mc io regs */
1636                 for (i = 0; i < regs_size; i++) {
1637                         if (rdev->new_fw) {
1638                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1639                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1640                         } else {
1641                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1642                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1643                         }
1644                 }
1645                 /* load the MC ucode */
1646                 for (i = 0; i < ucode_size; i++) {
1647                         if (rdev->new_fw)
1648                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1649                         else
1650                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1651                 }
1652
1653                 /* put the engine back into the active state */
1654                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1655                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1656                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1657
1658                 /* wait for training to complete */
1659                 for (i = 0; i < rdev->usec_timeout; i++) {
1660                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1661                                 break;
1662                         udelay(1);
1663                 }
1664                 for (i = 0; i < rdev->usec_timeout; i++) {
1665                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1666                                 break;
1667                         udelay(1);
1668                 }
1669         }
1670
1671         return 0;
1672 }
1673
1674 static int si_init_microcode(struct radeon_device *rdev)
1675 {
1676         const char *chip_name;
1677         const char *new_chip_name;
1678         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1679         size_t smc_req_size, mc2_req_size;
1680         char fw_name[30];
1681         int err;
1682         int new_fw = 0;
1683         bool new_smc = false;
1684         bool si58_fw = false;
1685         bool banks2_fw = false;
1686
1687         DRM_DEBUG("\n");
1688
1689         switch (rdev->family) {
1690         case CHIP_TAHITI:
1691                 chip_name = "TAHITI";
1692                 new_chip_name = "tahiti";
1693                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1694                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1695                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1696                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1697                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1698                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1699                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1700                 break;
1701         case CHIP_PITCAIRN:
1702                 chip_name = "PITCAIRN";
1703                 if ((rdev->pdev->revision == 0x81) &&
1704                     ((rdev->pdev->device == 0x6810) ||
1705                      (rdev->pdev->device == 0x6811)))
1706                         new_smc = true;
1707                 new_chip_name = "pitcairn";
1708                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1709                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1710                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1711                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1712                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1713                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1714                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1715                 break;
1716         case CHIP_VERDE:
1717                 chip_name = "VERDE";
1718                 if (((rdev->pdev->device == 0x6820) &&
1719                      ((rdev->pdev->revision == 0x81) ||
1720                       (rdev->pdev->revision == 0x83))) ||
1721                     ((rdev->pdev->device == 0x6821) &&
1722                      ((rdev->pdev->revision == 0x83) ||
1723                       (rdev->pdev->revision == 0x87))) ||
1724                     ((rdev->pdev->revision == 0x87) &&
1725                      ((rdev->pdev->device == 0x6823) ||
1726                       (rdev->pdev->device == 0x682b))))
1727                         new_smc = true;
1728                 new_chip_name = "verde";
1729                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1730                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1731                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1732                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1733                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1734                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1735                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1736                 break;
1737         case CHIP_OLAND:
1738                 chip_name = "OLAND";
1739                 if (((rdev->pdev->revision == 0x81) &&
1740                      ((rdev->pdev->device == 0x6600) ||
1741                       (rdev->pdev->device == 0x6604) ||
1742                       (rdev->pdev->device == 0x6605) ||
1743                       (rdev->pdev->device == 0x6610))) ||
1744                     ((rdev->pdev->revision == 0x83) &&
1745                      (rdev->pdev->device == 0x6610)))
1746                         new_smc = true;
1747                 new_chip_name = "oland";
1748                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1749                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1750                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1751                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1752                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1753                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1754                 break;
1755         case CHIP_HAINAN:
1756                 chip_name = "HAINAN";
1757                 if (((rdev->pdev->revision == 0x81) &&
1758                      (rdev->pdev->device == 0x6660)) ||
1759                     ((rdev->pdev->revision == 0x83) &&
1760                      ((rdev->pdev->device == 0x6660) ||
1761                       (rdev->pdev->device == 0x6663) ||
1762                       (rdev->pdev->device == 0x6665) ||
1763                       (rdev->pdev->device == 0x6667))))
1764                         new_smc = true;
1765                 else if ((rdev->pdev->revision == 0xc3) &&
1766                          (rdev->pdev->device == 0x6665))
1767                         banks2_fw = true;
1768                 new_chip_name = "hainan";
1769                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1770                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1771                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1772                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1773                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1774                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1775                 break;
1776         default: BUG();
1777         }
1778
1779         /* this memory configuration requires special firmware */
1780         if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1781                 si58_fw = true;
1782
1783         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1784
1785         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1786         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1787         if (err) {
1788                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1789                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1790                 if (err)
1791                         goto out;
1792                 if (rdev->pfp_fw->size != pfp_req_size) {
1793                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1794                                rdev->pfp_fw->size, fw_name);
1795                         err = -EINVAL;
1796                         goto out;
1797                 }
1798         } else {
1799                 err = radeon_ucode_validate(rdev->pfp_fw);
1800                 if (err) {
1801                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1802                                fw_name);
1803                         goto out;
1804                 } else {
1805                         new_fw++;
1806                 }
1807         }
1808
1809         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1810         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1811         if (err) {
1812                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1813                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1814                 if (err)
1815                         goto out;
1816                 if (rdev->me_fw->size != me_req_size) {
1817                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1818                                rdev->me_fw->size, fw_name);
1819                         err = -EINVAL;
1820                 }
1821         } else {
1822                 err = radeon_ucode_validate(rdev->me_fw);
1823                 if (err) {
1824                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1825                                fw_name);
1826                         goto out;
1827                 } else {
1828                         new_fw++;
1829                 }
1830         }
1831
1832         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1833         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1834         if (err) {
1835                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1836                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1837                 if (err)
1838                         goto out;
1839                 if (rdev->ce_fw->size != ce_req_size) {
1840                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1841                                rdev->ce_fw->size, fw_name);
1842                         err = -EINVAL;
1843                 }
1844         } else {
1845                 err = radeon_ucode_validate(rdev->ce_fw);
1846                 if (err) {
1847                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1848                                fw_name);
1849                         goto out;
1850                 } else {
1851                         new_fw++;
1852                 }
1853         }
1854
1855         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1856         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1857         if (err) {
1858                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1859                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1860                 if (err)
1861                         goto out;
1862                 if (rdev->rlc_fw->size != rlc_req_size) {
1863                         pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1864                                rdev->rlc_fw->size, fw_name);
1865                         err = -EINVAL;
1866                 }
1867         } else {
1868                 err = radeon_ucode_validate(rdev->rlc_fw);
1869                 if (err) {
1870                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1871                                fw_name);
1872                         goto out;
1873                 } else {
1874                         new_fw++;
1875                 }
1876         }
1877
1878         if (si58_fw)
1879                 snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1880         else
1881                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1882         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1883         if (err) {
1884                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1885                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1886                 if (err) {
1887                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1888                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1889                         if (err)
1890                                 goto out;
1891                 }
1892                 if ((rdev->mc_fw->size != mc_req_size) &&
1893                     (rdev->mc_fw->size != mc2_req_size)) {
1894                         pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1895                                rdev->mc_fw->size, fw_name);
1896                         err = -EINVAL;
1897                 }
1898                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1899         } else {
1900                 err = radeon_ucode_validate(rdev->mc_fw);
1901                 if (err) {
1902                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1903                                fw_name);
1904                         goto out;
1905                 } else {
1906                         new_fw++;
1907                 }
1908         }
1909
1910         if (banks2_fw)
1911                 snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1912         else if (new_smc)
1913                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1914         else
1915                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1916         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1917         if (err) {
1918                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1919                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1920                 if (err) {
1921                         pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1922                         release_firmware(rdev->smc_fw);
1923                         rdev->smc_fw = NULL;
1924                         err = 0;
1925                 } else if (rdev->smc_fw->size != smc_req_size) {
1926                         pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1927                                rdev->smc_fw->size, fw_name);
1928                         err = -EINVAL;
1929                 }
1930         } else {
1931                 err = radeon_ucode_validate(rdev->smc_fw);
1932                 if (err) {
1933                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1934                                fw_name);
1935                         goto out;
1936                 } else {
1937                         new_fw++;
1938                 }
1939         }
1940
1941         if (new_fw == 0) {
1942                 rdev->new_fw = false;
1943         } else if (new_fw < 6) {
1944                 pr_err("si_fw: mixing new and old firmware!\n");
1945                 err = -EINVAL;
1946         } else {
1947                 rdev->new_fw = true;
1948         }
1949 out:
1950         if (err) {
1951                 if (err != -EINVAL)
1952                         pr_err("si_cp: Failed to load firmware \"%s\"\n",
1953                                fw_name);
1954                 release_firmware(rdev->pfp_fw);
1955                 rdev->pfp_fw = NULL;
1956                 release_firmware(rdev->me_fw);
1957                 rdev->me_fw = NULL;
1958                 release_firmware(rdev->ce_fw);
1959                 rdev->ce_fw = NULL;
1960                 release_firmware(rdev->rlc_fw);
1961                 rdev->rlc_fw = NULL;
1962                 release_firmware(rdev->mc_fw);
1963                 rdev->mc_fw = NULL;
1964                 release_firmware(rdev->smc_fw);
1965                 rdev->smc_fw = NULL;
1966         }
1967         return err;
1968 }
1969
1970 /* watermark setup */
1971 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1972                                    struct radeon_crtc *radeon_crtc,
1973                                    struct drm_display_mode *mode,
1974                                    struct drm_display_mode *other_mode)
1975 {
1976         u32 tmp, buffer_alloc, i;
1977         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1978         /*
1979          * Line Buffer Setup
1980          * There are 3 line buffers, each one shared by 2 display controllers.
1981          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1982          * the display controllers.  The paritioning is done via one of four
1983          * preset allocations specified in bits 21:20:
1984          *  0 - half lb
1985          *  2 - whole lb, other crtc must be disabled
1986          */
1987         /* this can get tricky if we have two large displays on a paired group
1988          * of crtcs.  Ideally for multiple large displays we'd assign them to
1989          * non-linked crtcs for maximum line buffer allocation.
1990          */
1991         if (radeon_crtc->base.enabled && mode) {
1992                 if (other_mode) {
1993                         tmp = 0; /* 1/2 */
1994                         buffer_alloc = 1;
1995                 } else {
1996                         tmp = 2; /* whole */
1997                         buffer_alloc = 2;
1998                 }
1999         } else {
2000                 tmp = 0;
2001                 buffer_alloc = 0;
2002         }
2003
2004         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2005                DC_LB_MEMORY_CONFIG(tmp));
2006
2007         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2008                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2009         for (i = 0; i < rdev->usec_timeout; i++) {
2010                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2011                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
2012                         break;
2013                 udelay(1);
2014         }
2015
2016         if (radeon_crtc->base.enabled && mode) {
2017                 switch (tmp) {
2018                 case 0:
2019                 default:
2020                         return 4096 * 2;
2021                 case 2:
2022                         return 8192 * 2;
2023                 }
2024         }
2025
2026         /* controller not enabled, so no lb used */
2027         return 0;
2028 }
2029
2030 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2031 {
2032         u32 tmp = RREG32(MC_SHARED_CHMAP);
2033
2034         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2035         case 0:
2036         default:
2037                 return 1;
2038         case 1:
2039                 return 2;
2040         case 2:
2041                 return 4;
2042         case 3:
2043                 return 8;
2044         case 4:
2045                 return 3;
2046         case 5:
2047                 return 6;
2048         case 6:
2049                 return 10;
2050         case 7:
2051                 return 12;
2052         case 8:
2053                 return 16;
2054         }
2055 }
2056
2057 struct dce6_wm_params {
2058         u32 dram_channels; /* number of dram channels */
2059         u32 yclk;          /* bandwidth per dram data pin in kHz */
2060         u32 sclk;          /* engine clock in kHz */
2061         u32 disp_clk;      /* display clock in kHz */
2062         u32 src_width;     /* viewport width */
2063         u32 active_time;   /* active display time in ns */
2064         u32 blank_time;    /* blank time in ns */
2065         bool interlaced;    /* mode is interlaced */
2066         fixed20_12 vsc;    /* vertical scale ratio */
2067         u32 num_heads;     /* number of active crtcs */
2068         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2069         u32 lb_size;       /* line buffer allocated to pipe */
2070         u32 vtaps;         /* vertical scaler taps */
2071 };
2072
2073 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2074 {
2075         /* Calculate raw DRAM Bandwidth */
2076         fixed20_12 dram_efficiency; /* 0.7 */
2077         fixed20_12 yclk, dram_channels, bandwidth;
2078         fixed20_12 a;
2079
2080         a.full = dfixed_const(1000);
2081         yclk.full = dfixed_const(wm->yclk);
2082         yclk.full = dfixed_div(yclk, a);
2083         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2084         a.full = dfixed_const(10);
2085         dram_efficiency.full = dfixed_const(7);
2086         dram_efficiency.full = dfixed_div(dram_efficiency, a);
2087         bandwidth.full = dfixed_mul(dram_channels, yclk);
2088         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2089
2090         return dfixed_trunc(bandwidth);
2091 }
2092
2093 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2094 {
2095         /* Calculate DRAM Bandwidth and the part allocated to display. */
2096         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2097         fixed20_12 yclk, dram_channels, bandwidth;
2098         fixed20_12 a;
2099
2100         a.full = dfixed_const(1000);
2101         yclk.full = dfixed_const(wm->yclk);
2102         yclk.full = dfixed_div(yclk, a);
2103         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2104         a.full = dfixed_const(10);
2105         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2106         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2107         bandwidth.full = dfixed_mul(dram_channels, yclk);
2108         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2109
2110         return dfixed_trunc(bandwidth);
2111 }
2112
2113 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2114 {
2115         /* Calculate the display Data return Bandwidth */
2116         fixed20_12 return_efficiency; /* 0.8 */
2117         fixed20_12 sclk, bandwidth;
2118         fixed20_12 a;
2119
2120         a.full = dfixed_const(1000);
2121         sclk.full = dfixed_const(wm->sclk);
2122         sclk.full = dfixed_div(sclk, a);
2123         a.full = dfixed_const(10);
2124         return_efficiency.full = dfixed_const(8);
2125         return_efficiency.full = dfixed_div(return_efficiency, a);
2126         a.full = dfixed_const(32);
2127         bandwidth.full = dfixed_mul(a, sclk);
2128         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2129
2130         return dfixed_trunc(bandwidth);
2131 }
2132
2133 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2134 {
2135         return 32;
2136 }
2137
2138 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2139 {
2140         /* Calculate the DMIF Request Bandwidth */
2141         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2142         fixed20_12 disp_clk, sclk, bandwidth;
2143         fixed20_12 a, b1, b2;
2144         u32 min_bandwidth;
2145
2146         a.full = dfixed_const(1000);
2147         disp_clk.full = dfixed_const(wm->disp_clk);
2148         disp_clk.full = dfixed_div(disp_clk, a);
2149         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2150         b1.full = dfixed_mul(a, disp_clk);
2151
2152         a.full = dfixed_const(1000);
2153         sclk.full = dfixed_const(wm->sclk);
2154         sclk.full = dfixed_div(sclk, a);
2155         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2156         b2.full = dfixed_mul(a, sclk);
2157
2158         a.full = dfixed_const(10);
2159         disp_clk_request_efficiency.full = dfixed_const(8);
2160         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2161
2162         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2163
2164         a.full = dfixed_const(min_bandwidth);
2165         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2166
2167         return dfixed_trunc(bandwidth);
2168 }
2169
2170 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2171 {
2172         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2173         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2174         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2175         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2176
2177         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2178 }
2179
2180 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2181 {
2182         /* Calculate the display mode Average Bandwidth
2183          * DisplayMode should contain the source and destination dimensions,
2184          * timing, etc.
2185          */
2186         fixed20_12 bpp;
2187         fixed20_12 line_time;
2188         fixed20_12 src_width;
2189         fixed20_12 bandwidth;
2190         fixed20_12 a;
2191
2192         a.full = dfixed_const(1000);
2193         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2194         line_time.full = dfixed_div(line_time, a);
2195         bpp.full = dfixed_const(wm->bytes_per_pixel);
2196         src_width.full = dfixed_const(wm->src_width);
2197         bandwidth.full = dfixed_mul(src_width, bpp);
2198         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2199         bandwidth.full = dfixed_div(bandwidth, line_time);
2200
2201         return dfixed_trunc(bandwidth);
2202 }
2203
2204 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2205 {
2206         /* First calcualte the latency in ns */
2207         u32 mc_latency = 2000; /* 2000 ns. */
2208         u32 available_bandwidth = dce6_available_bandwidth(wm);
2209         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2210         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2211         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2212         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2213                 (wm->num_heads * cursor_line_pair_return_time);
2214         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2215         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2216         u32 tmp, dmif_size = 12288;
2217         fixed20_12 a, b, c;
2218
2219         if (wm->num_heads == 0)
2220                 return 0;
2221
2222         a.full = dfixed_const(2);
2223         b.full = dfixed_const(1);
2224         if ((wm->vsc.full > a.full) ||
2225             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2226             (wm->vtaps >= 5) ||
2227             ((wm->vsc.full >= a.full) && wm->interlaced))
2228                 max_src_lines_per_dst_line = 4;
2229         else
2230                 max_src_lines_per_dst_line = 2;
2231
2232         a.full = dfixed_const(available_bandwidth);
2233         b.full = dfixed_const(wm->num_heads);
2234         a.full = dfixed_div(a, b);
2235         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2236         tmp = min(dfixed_trunc(a), tmp);
2237
2238         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2239
2240         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2241         b.full = dfixed_const(1000);
2242         c.full = dfixed_const(lb_fill_bw);
2243         b.full = dfixed_div(c, b);
2244         a.full = dfixed_div(a, b);
2245         line_fill_time = dfixed_trunc(a);
2246
2247         if (line_fill_time < wm->active_time)
2248                 return latency;
2249         else
2250                 return latency + (line_fill_time - wm->active_time);
2251
2252 }
2253
2254 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2255 {
2256         if (dce6_average_bandwidth(wm) <=
2257             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2258                 return true;
2259         else
2260                 return false;
2261 };
2262
2263 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2264 {
2265         if (dce6_average_bandwidth(wm) <=
2266             (dce6_available_bandwidth(wm) / wm->num_heads))
2267                 return true;
2268         else
2269                 return false;
2270 };
2271
2272 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2273 {
2274         u32 lb_partitions = wm->lb_size / wm->src_width;
2275         u32 line_time = wm->active_time + wm->blank_time;
2276         u32 latency_tolerant_lines;
2277         u32 latency_hiding;
2278         fixed20_12 a;
2279
2280         a.full = dfixed_const(1);
2281         if (wm->vsc.full > a.full)
2282                 latency_tolerant_lines = 1;
2283         else {
2284                 if (lb_partitions <= (wm->vtaps + 1))
2285                         latency_tolerant_lines = 1;
2286                 else
2287                         latency_tolerant_lines = 2;
2288         }
2289
2290         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2291
2292         if (dce6_latency_watermark(wm) <= latency_hiding)
2293                 return true;
2294         else
2295                 return false;
2296 }
2297
2298 static void dce6_program_watermarks(struct radeon_device *rdev,
2299                                          struct radeon_crtc *radeon_crtc,
2300                                          u32 lb_size, u32 num_heads)
2301 {
2302         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2303         struct dce6_wm_params wm_low, wm_high;
2304         u32 dram_channels;
2305         u32 active_time;
2306         u32 line_time = 0;
2307         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2308         u32 priority_a_mark = 0, priority_b_mark = 0;
2309         u32 priority_a_cnt = PRIORITY_OFF;
2310         u32 priority_b_cnt = PRIORITY_OFF;
2311         u32 tmp, arb_control3;
2312         fixed20_12 a, b, c;
2313
2314         if (radeon_crtc->base.enabled && num_heads && mode) {
2315                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2316                                             (u32)mode->clock);
2317                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2318                                           (u32)mode->clock);
2319                 line_time = min(line_time, (u32)65535);
2320                 priority_a_cnt = 0;
2321                 priority_b_cnt = 0;
2322
2323                 if (rdev->family == CHIP_ARUBA)
2324                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2325                 else
2326                         dram_channels = si_get_number_of_dram_channels(rdev);
2327
2328                 /* watermark for high clocks */
2329                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2330                         wm_high.yclk =
2331                                 radeon_dpm_get_mclk(rdev, false) * 10;
2332                         wm_high.sclk =
2333                                 radeon_dpm_get_sclk(rdev, false) * 10;
2334                 } else {
2335                         wm_high.yclk = rdev->pm.current_mclk * 10;
2336                         wm_high.sclk = rdev->pm.current_sclk * 10;
2337                 }
2338
2339                 wm_high.disp_clk = mode->clock;
2340                 wm_high.src_width = mode->crtc_hdisplay;
2341                 wm_high.active_time = active_time;
2342                 wm_high.blank_time = line_time - wm_high.active_time;
2343                 wm_high.interlaced = false;
2344                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2345                         wm_high.interlaced = true;
2346                 wm_high.vsc = radeon_crtc->vsc;
2347                 wm_high.vtaps = 1;
2348                 if (radeon_crtc->rmx_type != RMX_OFF)
2349                         wm_high.vtaps = 2;
2350                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2351                 wm_high.lb_size = lb_size;
2352                 wm_high.dram_channels = dram_channels;
2353                 wm_high.num_heads = num_heads;
2354
2355                 /* watermark for low clocks */
2356                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2357                         wm_low.yclk =
2358                                 radeon_dpm_get_mclk(rdev, true) * 10;
2359                         wm_low.sclk =
2360                                 radeon_dpm_get_sclk(rdev, true) * 10;
2361                 } else {
2362                         wm_low.yclk = rdev->pm.current_mclk * 10;
2363                         wm_low.sclk = rdev->pm.current_sclk * 10;
2364                 }
2365
2366                 wm_low.disp_clk = mode->clock;
2367                 wm_low.src_width = mode->crtc_hdisplay;
2368                 wm_low.active_time = active_time;
2369                 wm_low.blank_time = line_time - wm_low.active_time;
2370                 wm_low.interlaced = false;
2371                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2372                         wm_low.interlaced = true;
2373                 wm_low.vsc = radeon_crtc->vsc;
2374                 wm_low.vtaps = 1;
2375                 if (radeon_crtc->rmx_type != RMX_OFF)
2376                         wm_low.vtaps = 2;
2377                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2378                 wm_low.lb_size = lb_size;
2379                 wm_low.dram_channels = dram_channels;
2380                 wm_low.num_heads = num_heads;
2381
2382                 /* set for high clocks */
2383                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2384                 /* set for low clocks */
2385                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2386
2387                 /* possibly force display priority to high */
2388                 /* should really do this at mode validation time... */
2389                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2390                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2391                     !dce6_check_latency_hiding(&wm_high) ||
2392                     (rdev->disp_priority == 2)) {
2393                         DRM_DEBUG_KMS("force priority to high\n");
2394                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2395                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2396                 }
2397                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2398                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2399                     !dce6_check_latency_hiding(&wm_low) ||
2400                     (rdev->disp_priority == 2)) {
2401                         DRM_DEBUG_KMS("force priority to high\n");
2402                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2403                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2404                 }
2405
2406                 a.full = dfixed_const(1000);
2407                 b.full = dfixed_const(mode->clock);
2408                 b.full = dfixed_div(b, a);
2409                 c.full = dfixed_const(latency_watermark_a);
2410                 c.full = dfixed_mul(c, b);
2411                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2412                 c.full = dfixed_div(c, a);
2413                 a.full = dfixed_const(16);
2414                 c.full = dfixed_div(c, a);
2415                 priority_a_mark = dfixed_trunc(c);
2416                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2417
2418                 a.full = dfixed_const(1000);
2419                 b.full = dfixed_const(mode->clock);
2420                 b.full = dfixed_div(b, a);
2421                 c.full = dfixed_const(latency_watermark_b);
2422                 c.full = dfixed_mul(c, b);
2423                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2424                 c.full = dfixed_div(c, a);
2425                 a.full = dfixed_const(16);
2426                 c.full = dfixed_div(c, a);
2427                 priority_b_mark = dfixed_trunc(c);
2428                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2429
2430                 /* Save number of lines the linebuffer leads before the scanout */
2431                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2432         }
2433
2434         /* select wm A */
2435         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2436         tmp = arb_control3;
2437         tmp &= ~LATENCY_WATERMARK_MASK(3);
2438         tmp |= LATENCY_WATERMARK_MASK(1);
2439         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2440         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2441                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2442                 LATENCY_HIGH_WATERMARK(line_time)));
2443         /* select wm B */
2444         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2445         tmp &= ~LATENCY_WATERMARK_MASK(3);
2446         tmp |= LATENCY_WATERMARK_MASK(2);
2447         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2448         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2449                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2450                 LATENCY_HIGH_WATERMARK(line_time)));
2451         /* restore original selection */
2452         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2453
2454         /* write the priority marks */
2455         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2456         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2457
2458         /* save values for DPM */
2459         radeon_crtc->line_time = line_time;
2460         radeon_crtc->wm_high = latency_watermark_a;
2461         radeon_crtc->wm_low = latency_watermark_b;
2462 }
2463
2464 void dce6_bandwidth_update(struct radeon_device *rdev)
2465 {
2466         struct drm_display_mode *mode0 = NULL;
2467         struct drm_display_mode *mode1 = NULL;
2468         u32 num_heads = 0, lb_size;
2469         int i;
2470
2471         if (!rdev->mode_info.mode_config_initialized)
2472                 return;
2473
2474         radeon_update_display_priority(rdev);
2475
2476         for (i = 0; i < rdev->num_crtc; i++) {
2477                 if (rdev->mode_info.crtcs[i]->base.enabled)
2478                         num_heads++;
2479         }
2480         for (i = 0; i < rdev->num_crtc; i += 2) {
2481                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2482                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2483                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2484                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2485                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2486                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2487         }
2488 }
2489
2490 /*
2491  * Core functions
2492  */
2493 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2494 {
2495         u32 *tile = rdev->config.si.tile_mode_array;
2496         const u32 num_tile_mode_states =
2497                         ARRAY_SIZE(rdev->config.si.tile_mode_array);
2498         u32 reg_offset, split_equal_to_row_size;
2499
2500         switch (rdev->config.si.mem_row_size_in_kb) {
2501         case 1:
2502                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2503                 break;
2504         case 2:
2505         default:
2506                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2507                 break;
2508         case 4:
2509                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2510                 break;
2511         }
2512
2513         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2514                 tile[reg_offset] = 0;
2515
2516         switch(rdev->family) {
2517         case CHIP_TAHITI:
2518         case CHIP_PITCAIRN:
2519                 /* non-AA compressed depth or any compressed stencil */
2520                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2522                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2523                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2524                            NUM_BANKS(ADDR_SURF_16_BANK) |
2525                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2527                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2528                 /* 2xAA/4xAA compressed depth only */
2529                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2531                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2532                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2533                            NUM_BANKS(ADDR_SURF_16_BANK) |
2534                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2537                 /* 8xAA compressed depth only */
2538                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2540                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2541                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2542                            NUM_BANKS(ADDR_SURF_16_BANK) |
2543                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2546                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2547                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2549                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2550                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2551                            NUM_BANKS(ADDR_SURF_16_BANK) |
2552                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2554                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2555                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2556                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2557                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2558                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2559                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2560                            NUM_BANKS(ADDR_SURF_16_BANK) |
2561                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2563                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2564                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2565                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2567                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2568                            TILE_SPLIT(split_equal_to_row_size) |
2569                            NUM_BANKS(ADDR_SURF_16_BANK) |
2570                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2572                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2574                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2576                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2577                            TILE_SPLIT(split_equal_to_row_size) |
2578                            NUM_BANKS(ADDR_SURF_16_BANK) |
2579                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2582                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2583                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2585                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                            TILE_SPLIT(split_equal_to_row_size) |
2587                            NUM_BANKS(ADDR_SURF_16_BANK) |
2588                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2591                 /* 1D and 1D Array Surfaces */
2592                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2593                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2594                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2596                            NUM_BANKS(ADDR_SURF_16_BANK) |
2597                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2600                 /* Displayable maps. */
2601                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2604                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2605                            NUM_BANKS(ADDR_SURF_16_BANK) |
2606                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2608                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2609                 /* Display 8bpp. */
2610                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2611                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2613                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2614                            NUM_BANKS(ADDR_SURF_16_BANK) |
2615                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2617                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2618                 /* Display 16bpp. */
2619                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2621                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2622                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2623                            NUM_BANKS(ADDR_SURF_16_BANK) |
2624                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2627                 /* Display 32bpp. */
2628                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2631                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2632                            NUM_BANKS(ADDR_SURF_16_BANK) |
2633                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2635                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2636                 /* Thin. */
2637                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2639                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2640                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2641                            NUM_BANKS(ADDR_SURF_16_BANK) |
2642                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2644                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2645                 /* Thin 8 bpp. */
2646                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2648                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2650                            NUM_BANKS(ADDR_SURF_16_BANK) |
2651                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2654                 /* Thin 16 bpp. */
2655                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2658                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2659                            NUM_BANKS(ADDR_SURF_16_BANK) |
2660                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2662                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2663                 /* Thin 32 bpp. */
2664                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2667                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2668                            NUM_BANKS(ADDR_SURF_16_BANK) |
2669                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2671                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2672                 /* Thin 64 bpp. */
2673                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2675                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2676                            TILE_SPLIT(split_equal_to_row_size) |
2677                            NUM_BANKS(ADDR_SURF_16_BANK) |
2678                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2681                 /* 8 bpp PRT. */
2682                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2684                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2686                            NUM_BANKS(ADDR_SURF_16_BANK) |
2687                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2688                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2689                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2690                 /* 16 bpp PRT */
2691                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695                            NUM_BANKS(ADDR_SURF_16_BANK) |
2696                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2698                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2699                 /* 32 bpp PRT */
2700                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2702                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2703                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2704                            NUM_BANKS(ADDR_SURF_16_BANK) |
2705                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2707                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2708                 /* 64 bpp PRT */
2709                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2711                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2712                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2713                            NUM_BANKS(ADDR_SURF_16_BANK) |
2714                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2717                 /* 128 bpp PRT */
2718                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2719                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2720                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2721                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2722                            NUM_BANKS(ADDR_SURF_8_BANK) |
2723                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2725                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2726
2727                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2728                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2729                 break;
2730
2731         case CHIP_VERDE:
2732         case CHIP_OLAND:
2733         case CHIP_HAINAN:
2734                 /* non-AA compressed depth or any compressed stencil */
2735                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2739                            NUM_BANKS(ADDR_SURF_16_BANK) |
2740                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2743                 /* 2xAA/4xAA compressed depth only */
2744                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2746                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2748                            NUM_BANKS(ADDR_SURF_16_BANK) |
2749                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2752                 /* 8xAA compressed depth only */
2753                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2755                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2757                            NUM_BANKS(ADDR_SURF_16_BANK) |
2758                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2761                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2762                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2764                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2766                            NUM_BANKS(ADDR_SURF_16_BANK) |
2767                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2769                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2770                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2771                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2772                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2773                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2775                            NUM_BANKS(ADDR_SURF_16_BANK) |
2776                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2778                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2779                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2780                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2782                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783                            TILE_SPLIT(split_equal_to_row_size) |
2784                            NUM_BANKS(ADDR_SURF_16_BANK) |
2785                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2787                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2788                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2789                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2790                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2791                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2792                            TILE_SPLIT(split_equal_to_row_size) |
2793                            NUM_BANKS(ADDR_SURF_16_BANK) |
2794                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2796                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2797                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2798                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2800                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801                            TILE_SPLIT(split_equal_to_row_size) |
2802                            NUM_BANKS(ADDR_SURF_16_BANK) |
2803                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2805                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2806                 /* 1D and 1D Array Surfaces */
2807                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2808                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2811                            NUM_BANKS(ADDR_SURF_16_BANK) |
2812                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2814                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2815                 /* Displayable maps. */
2816                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2817                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2818                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2820                            NUM_BANKS(ADDR_SURF_16_BANK) |
2821                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2824                 /* Display 8bpp. */
2825                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2829                            NUM_BANKS(ADDR_SURF_16_BANK) |
2830                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2832                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2833                 /* Display 16bpp. */
2834                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2838                            NUM_BANKS(ADDR_SURF_16_BANK) |
2839                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2841                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2842                 /* Display 32bpp. */
2843                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2844                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2846                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2847                            NUM_BANKS(ADDR_SURF_16_BANK) |
2848                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2851                 /* Thin. */
2852                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2853                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2854                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2855                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2856                            NUM_BANKS(ADDR_SURF_16_BANK) |
2857                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2860                 /* Thin 8 bpp. */
2861                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2862                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2863                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2864                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2865                            NUM_BANKS(ADDR_SURF_16_BANK) |
2866                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2869                 /* Thin 16 bpp. */
2870                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2872                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2874                            NUM_BANKS(ADDR_SURF_16_BANK) |
2875                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2877                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2878                 /* Thin 32 bpp. */
2879                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2881                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2882                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2883                            NUM_BANKS(ADDR_SURF_16_BANK) |
2884                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2887                 /* Thin 64 bpp. */
2888                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2890                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2891                            TILE_SPLIT(split_equal_to_row_size) |
2892                            NUM_BANKS(ADDR_SURF_16_BANK) |
2893                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2896                 /* 8 bpp PRT. */
2897                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2899                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2900                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2901                            NUM_BANKS(ADDR_SURF_16_BANK) |
2902                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2903                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2904                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2905                 /* 16 bpp PRT */
2906                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2908                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2909                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2910                            NUM_BANKS(ADDR_SURF_16_BANK) |
2911                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2913                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2914                 /* 32 bpp PRT */
2915                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2917                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2918                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2919                            NUM_BANKS(ADDR_SURF_16_BANK) |
2920                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2923                 /* 64 bpp PRT */
2924                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2925                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2926                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2927                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2928                            NUM_BANKS(ADDR_SURF_16_BANK) |
2929                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2931                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2932                 /* 128 bpp PRT */
2933                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2935                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2936                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2937                            NUM_BANKS(ADDR_SURF_8_BANK) |
2938                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2941
2942                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2943                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2944                 break;
2945
2946         default:
2947                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2948         }
2949 }
2950
2951 static void si_select_se_sh(struct radeon_device *rdev,
2952                             u32 se_num, u32 sh_num)
2953 {
2954         u32 data = INSTANCE_BROADCAST_WRITES;
2955
2956         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2957                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2958         else if (se_num == 0xffffffff)
2959                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2960         else if (sh_num == 0xffffffff)
2961                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2962         else
2963                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2964         WREG32(GRBM_GFX_INDEX, data);
2965 }
2966
2967 static u32 si_create_bitmask(u32 bit_width)
2968 {
2969         u32 i, mask = 0;
2970
2971         for (i = 0; i < bit_width; i++) {
2972                 mask <<= 1;
2973                 mask |= 1;
2974         }
2975         return mask;
2976 }
2977
2978 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2979 {
2980         u32 data, mask;
2981
2982         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2983         if (data & 1)
2984                 data &= INACTIVE_CUS_MASK;
2985         else
2986                 data = 0;
2987         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2988
2989         data >>= INACTIVE_CUS_SHIFT;
2990
2991         mask = si_create_bitmask(cu_per_sh);
2992
2993         return ~data & mask;
2994 }
2995
2996 static void si_setup_spi(struct radeon_device *rdev,
2997                          u32 se_num, u32 sh_per_se,
2998                          u32 cu_per_sh)
2999 {
3000         int i, j, k;
3001         u32 data, mask, active_cu;
3002
3003         for (i = 0; i < se_num; i++) {
3004                 for (j = 0; j < sh_per_se; j++) {
3005                         si_select_se_sh(rdev, i, j);
3006                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3007                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3008
3009                         mask = 1;
3010                         for (k = 0; k < 16; k++) {
3011                                 mask <<= k;
3012                                 if (active_cu & mask) {
3013                                         data &= ~mask;
3014                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3015                                         break;
3016                                 }
3017                         }
3018                 }
3019         }
3020         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3021 }
3022
3023 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3024                               u32 max_rb_num_per_se,
3025                               u32 sh_per_se)
3026 {
3027         u32 data, mask;
3028
3029         data = RREG32(CC_RB_BACKEND_DISABLE);
3030         if (data & 1)
3031                 data &= BACKEND_DISABLE_MASK;
3032         else
3033                 data = 0;
3034         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3035
3036         data >>= BACKEND_DISABLE_SHIFT;
3037
3038         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3039
3040         return data & mask;
3041 }
3042
3043 static void si_setup_rb(struct radeon_device *rdev,
3044                         u32 se_num, u32 sh_per_se,
3045                         u32 max_rb_num_per_se)
3046 {
3047         int i, j;
3048         u32 data, mask;
3049         u32 disabled_rbs = 0;
3050         u32 enabled_rbs = 0;
3051
3052         for (i = 0; i < se_num; i++) {
3053                 for (j = 0; j < sh_per_se; j++) {
3054                         si_select_se_sh(rdev, i, j);
3055                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3056                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3057                 }
3058         }
3059         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3060
3061         mask = 1;
3062         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3063                 if (!(disabled_rbs & mask))
3064                         enabled_rbs |= mask;
3065                 mask <<= 1;
3066         }
3067
3068         rdev->config.si.backend_enable_mask = enabled_rbs;
3069
3070         for (i = 0; i < se_num; i++) {
3071                 si_select_se_sh(rdev, i, 0xffffffff);
3072                 data = 0;
3073                 for (j = 0; j < sh_per_se; j++) {
3074                         switch (enabled_rbs & 3) {
3075                         case 1:
3076                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3077                                 break;
3078                         case 2:
3079                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3080                                 break;
3081                         case 3:
3082                         default:
3083                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3084                                 break;
3085                         }
3086                         enabled_rbs >>= 2;
3087                 }
3088                 WREG32(PA_SC_RASTER_CONFIG, data);
3089         }
3090         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3091 }
3092
3093 static void si_gpu_init(struct radeon_device *rdev)
3094 {
3095         u32 gb_addr_config = 0;
3096         u32 mc_shared_chmap, mc_arb_ramcfg;
3097         u32 sx_debug_1;
3098         u32 hdp_host_path_cntl;
3099         u32 tmp;
3100         int i, j;
3101
3102         switch (rdev->family) {
3103         case CHIP_TAHITI:
3104                 rdev->config.si.max_shader_engines = 2;
3105                 rdev->config.si.max_tile_pipes = 12;
3106                 rdev->config.si.max_cu_per_sh = 8;
3107                 rdev->config.si.max_sh_per_se = 2;
3108                 rdev->config.si.max_backends_per_se = 4;
3109                 rdev->config.si.max_texture_channel_caches = 12;
3110                 rdev->config.si.max_gprs = 256;
3111                 rdev->config.si.max_gs_threads = 32;
3112                 rdev->config.si.max_hw_contexts = 8;
3113
3114                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3115                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3116                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3117                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3118                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3119                 break;
3120         case CHIP_PITCAIRN:
3121                 rdev->config.si.max_shader_engines = 2;
3122                 rdev->config.si.max_tile_pipes = 8;
3123                 rdev->config.si.max_cu_per_sh = 5;
3124                 rdev->config.si.max_sh_per_se = 2;
3125                 rdev->config.si.max_backends_per_se = 4;
3126                 rdev->config.si.max_texture_channel_caches = 8;
3127                 rdev->config.si.max_gprs = 256;
3128                 rdev->config.si.max_gs_threads = 32;
3129                 rdev->config.si.max_hw_contexts = 8;
3130
3131                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3132                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3133                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3134                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3135                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3136                 break;
3137         case CHIP_VERDE:
3138         default:
3139                 rdev->config.si.max_shader_engines = 1;
3140                 rdev->config.si.max_tile_pipes = 4;
3141                 rdev->config.si.max_cu_per_sh = 5;
3142                 rdev->config.si.max_sh_per_se = 2;
3143                 rdev->config.si.max_backends_per_se = 4;
3144                 rdev->config.si.max_texture_channel_caches = 4;
3145                 rdev->config.si.max_gprs = 256;
3146                 rdev->config.si.max_gs_threads = 32;
3147                 rdev->config.si.max_hw_contexts = 8;
3148
3149                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3150                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3151                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3152                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3153                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3154                 break;
3155         case CHIP_OLAND:
3156                 rdev->config.si.max_shader_engines = 1;
3157                 rdev->config.si.max_tile_pipes = 4;
3158                 rdev->config.si.max_cu_per_sh = 6;
3159                 rdev->config.si.max_sh_per_se = 1;
3160                 rdev->config.si.max_backends_per_se = 2;
3161                 rdev->config.si.max_texture_channel_caches = 4;
3162                 rdev->config.si.max_gprs = 256;
3163                 rdev->config.si.max_gs_threads = 16;
3164                 rdev->config.si.max_hw_contexts = 8;
3165
3166                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3167                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3168                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3169                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3170                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3171                 break;
3172         case CHIP_HAINAN:
3173                 rdev->config.si.max_shader_engines = 1;
3174                 rdev->config.si.max_tile_pipes = 4;
3175                 rdev->config.si.max_cu_per_sh = 5;
3176                 rdev->config.si.max_sh_per_se = 1;
3177                 rdev->config.si.max_backends_per_se = 1;
3178                 rdev->config.si.max_texture_channel_caches = 2;
3179                 rdev->config.si.max_gprs = 256;
3180                 rdev->config.si.max_gs_threads = 16;
3181                 rdev->config.si.max_hw_contexts = 8;
3182
3183                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3184                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3185                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3186                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3187                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3188                 break;
3189         }
3190
3191         /* Initialize HDP */
3192         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3193                 WREG32((0x2c14 + j), 0x00000000);
3194                 WREG32((0x2c18 + j), 0x00000000);
3195                 WREG32((0x2c1c + j), 0x00000000);
3196                 WREG32((0x2c20 + j), 0x00000000);
3197                 WREG32((0x2c24 + j), 0x00000000);
3198         }
3199
3200         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3201         WREG32(SRBM_INT_CNTL, 1);
3202         WREG32(SRBM_INT_ACK, 1);
3203
3204         evergreen_fix_pci_max_read_req_size(rdev);
3205
3206         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3207
3208         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3209         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3210
3211         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3212         rdev->config.si.mem_max_burst_length_bytes = 256;
3213         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3214         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3215         if (rdev->config.si.mem_row_size_in_kb > 4)
3216                 rdev->config.si.mem_row_size_in_kb = 4;
3217         /* XXX use MC settings? */
3218         rdev->config.si.shader_engine_tile_size = 32;
3219         rdev->config.si.num_gpus = 1;
3220         rdev->config.si.multi_gpu_tile_size = 64;
3221
3222         /* fix up row size */
3223         gb_addr_config &= ~ROW_SIZE_MASK;
3224         switch (rdev->config.si.mem_row_size_in_kb) {
3225         case 1:
3226         default:
3227                 gb_addr_config |= ROW_SIZE(0);
3228                 break;
3229         case 2:
3230                 gb_addr_config |= ROW_SIZE(1);
3231                 break;
3232         case 4:
3233                 gb_addr_config |= ROW_SIZE(2);
3234                 break;
3235         }
3236
3237         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3238          * not have bank info, so create a custom tiling dword.
3239          * bits 3:0   num_pipes
3240          * bits 7:4   num_banks
3241          * bits 11:8  group_size
3242          * bits 15:12 row_size
3243          */
3244         rdev->config.si.tile_config = 0;
3245         switch (rdev->config.si.num_tile_pipes) {
3246         case 1:
3247                 rdev->config.si.tile_config |= (0 << 0);
3248                 break;
3249         case 2:
3250                 rdev->config.si.tile_config |= (1 << 0);
3251                 break;
3252         case 4:
3253                 rdev->config.si.tile_config |= (2 << 0);
3254                 break;
3255         case 8:
3256         default:
3257                 /* XXX what about 12? */
3258                 rdev->config.si.tile_config |= (3 << 0);
3259                 break;
3260         }
3261         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3262         case 0: /* four banks */
3263                 rdev->config.si.tile_config |= 0 << 4;
3264                 break;
3265         case 1: /* eight banks */
3266                 rdev->config.si.tile_config |= 1 << 4;
3267                 break;
3268         case 2: /* sixteen banks */
3269         default:
3270                 rdev->config.si.tile_config |= 2 << 4;
3271                 break;
3272         }
3273         rdev->config.si.tile_config |=
3274                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3275         rdev->config.si.tile_config |=
3276                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3277
3278         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3279         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3280         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3281         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3282         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3283         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3284         if (rdev->has_uvd) {
3285                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3286                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3287                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3288         }
3289
3290         si_tiling_mode_table_init(rdev);
3291
3292         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3293                     rdev->config.si.max_sh_per_se,
3294                     rdev->config.si.max_backends_per_se);
3295
3296         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3297                      rdev->config.si.max_sh_per_se,
3298                      rdev->config.si.max_cu_per_sh);
3299
3300         rdev->config.si.active_cus = 0;
3301         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3302                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3303                         rdev->config.si.active_cus +=
3304                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3305                 }
3306         }
3307
3308         /* set HW defaults for 3D engine */
3309         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3310                                      ROQ_IB2_START(0x2b)));
3311         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3312
3313         sx_debug_1 = RREG32(SX_DEBUG_1);
3314         WREG32(SX_DEBUG_1, sx_debug_1);
3315
3316         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3317
3318         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3319                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3320                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3321                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3322
3323         WREG32(VGT_NUM_INSTANCES, 1);
3324
3325         WREG32(CP_PERFMON_CNTL, 0);
3326
3327         WREG32(SQ_CONFIG, 0);
3328
3329         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3330                                           FORCE_EOV_MAX_REZ_CNT(255)));
3331
3332         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3333                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3334
3335         WREG32(VGT_GS_VERTEX_REUSE, 16);
3336         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3337
3338         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3339         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3340         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3341         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3342         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3343         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3344         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3345         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3346
3347         tmp = RREG32(HDP_MISC_CNTL);
3348         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3349         WREG32(HDP_MISC_CNTL, tmp);
3350
3351         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3352         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3353
3354         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3355
3356         udelay(50);
3357 }
3358
3359 /*
3360  * GPU scratch registers helpers function.
3361  */
3362 static void si_scratch_init(struct radeon_device *rdev)
3363 {
3364         int i;
3365
3366         rdev->scratch.num_reg = 7;
3367         rdev->scratch.reg_base = SCRATCH_REG0;
3368         for (i = 0; i < rdev->scratch.num_reg; i++) {
3369                 rdev->scratch.free[i] = true;
3370                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3371         }
3372 }
3373
3374 void si_fence_ring_emit(struct radeon_device *rdev,
3375                         struct radeon_fence *fence)
3376 {
3377         struct radeon_ring *ring = &rdev->ring[fence->ring];
3378         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3379
3380         /* flush read cache over gart */
3381         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3382         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3383         radeon_ring_write(ring, 0);
3384         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3385         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3386                           PACKET3_TC_ACTION_ENA |
3387                           PACKET3_SH_KCACHE_ACTION_ENA |
3388                           PACKET3_SH_ICACHE_ACTION_ENA);
3389         radeon_ring_write(ring, 0xFFFFFFFF);
3390         radeon_ring_write(ring, 0);
3391         radeon_ring_write(ring, 10); /* poll interval */
3392         /* EVENT_WRITE_EOP - flush caches, send int */
3393         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3394         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3395         radeon_ring_write(ring, lower_32_bits(addr));
3396         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3397         radeon_ring_write(ring, fence->seq);
3398         radeon_ring_write(ring, 0);
3399 }
3400
3401 /*
3402  * IB stuff
3403  */
3404 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3405 {
3406         struct radeon_ring *ring = &rdev->ring[ib->ring];
3407         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3408         u32 header;
3409
3410         if (ib->is_const_ib) {
3411                 /* set switch buffer packet before const IB */
3412                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3413                 radeon_ring_write(ring, 0);
3414
3415                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3416         } else {
3417                 u32 next_rptr;
3418                 if (ring->rptr_save_reg) {
3419                         next_rptr = ring->wptr + 3 + 4 + 8;
3420                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3421                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3422                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3423                         radeon_ring_write(ring, next_rptr);
3424                 } else if (rdev->wb.enabled) {
3425                         next_rptr = ring->wptr + 5 + 4 + 8;
3426                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3427                         radeon_ring_write(ring, (1 << 8));
3428                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3429                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3430                         radeon_ring_write(ring, next_rptr);
3431                 }
3432
3433                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3434         }
3435
3436         radeon_ring_write(ring, header);
3437         radeon_ring_write(ring,
3438 #ifdef __BIG_ENDIAN
3439                           (2 << 0) |
3440 #endif
3441                           (ib->gpu_addr & 0xFFFFFFFC));
3442         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3443         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3444
3445         if (!ib->is_const_ib) {
3446                 /* flush read cache over gart for this vmid */
3447                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3448                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3449                 radeon_ring_write(ring, vm_id);
3450                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3451                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3452                                   PACKET3_TC_ACTION_ENA |
3453                                   PACKET3_SH_KCACHE_ACTION_ENA |
3454                                   PACKET3_SH_ICACHE_ACTION_ENA);
3455                 radeon_ring_write(ring, 0xFFFFFFFF);
3456                 radeon_ring_write(ring, 0);
3457                 radeon_ring_write(ring, 10); /* poll interval */
3458         }
3459 }
3460
3461 /*
3462  * CP.
3463  */
3464 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3465 {
3466         if (enable)
3467                 WREG32(CP_ME_CNTL, 0);
3468         else {
3469                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3470                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3471                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3472                 WREG32(SCRATCH_UMSK, 0);
3473                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3474                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3475                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3476         }
3477         udelay(50);
3478 }
3479
3480 static int si_cp_load_microcode(struct radeon_device *rdev)
3481 {
3482         int i;
3483
3484         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3485                 return -EINVAL;
3486
3487         si_cp_enable(rdev, false);
3488
3489         if (rdev->new_fw) {
3490                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3491                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3492                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3493                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3494                 const struct gfx_firmware_header_v1_0 *me_hdr =
3495                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3496                 const __le32 *fw_data;
3497                 u32 fw_size;
3498
3499                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3500                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3501                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3502
3503                 /* PFP */
3504                 fw_data = (const __le32 *)
3505                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3506                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3507                 WREG32(CP_PFP_UCODE_ADDR, 0);
3508                 for (i = 0; i < fw_size; i++)
3509                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3510                 WREG32(CP_PFP_UCODE_ADDR, 0);
3511
3512                 /* CE */
3513                 fw_data = (const __le32 *)
3514                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3515                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3516                 WREG32(CP_CE_UCODE_ADDR, 0);
3517                 for (i = 0; i < fw_size; i++)
3518                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3519                 WREG32(CP_CE_UCODE_ADDR, 0);
3520
3521                 /* ME */
3522                 fw_data = (const __be32 *)
3523                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3524                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3525                 WREG32(CP_ME_RAM_WADDR, 0);
3526                 for (i = 0; i < fw_size; i++)
3527                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3528                 WREG32(CP_ME_RAM_WADDR, 0);
3529         } else {
3530                 const __be32 *fw_data;
3531
3532                 /* PFP */
3533                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3534                 WREG32(CP_PFP_UCODE_ADDR, 0);
3535                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3536                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3537                 WREG32(CP_PFP_UCODE_ADDR, 0);
3538
3539                 /* CE */
3540                 fw_data = (const __be32 *)rdev->ce_fw->data;
3541                 WREG32(CP_CE_UCODE_ADDR, 0);
3542                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3543                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3544                 WREG32(CP_CE_UCODE_ADDR, 0);
3545
3546                 /* ME */
3547                 fw_data = (const __be32 *)rdev->me_fw->data;
3548                 WREG32(CP_ME_RAM_WADDR, 0);
3549                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3550                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3551                 WREG32(CP_ME_RAM_WADDR, 0);
3552         }
3553
3554         WREG32(CP_PFP_UCODE_ADDR, 0);
3555         WREG32(CP_CE_UCODE_ADDR, 0);
3556         WREG32(CP_ME_RAM_WADDR, 0);
3557         WREG32(CP_ME_RAM_RADDR, 0);
3558         return 0;
3559 }
3560
3561 static int si_cp_start(struct radeon_device *rdev)
3562 {
3563         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3564         int r, i;
3565
3566         r = radeon_ring_lock(rdev, ring, 7 + 4);
3567         if (r) {
3568                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3569                 return r;
3570         }
3571         /* init the CP */
3572         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3573         radeon_ring_write(ring, 0x1);
3574         radeon_ring_write(ring, 0x0);
3575         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3576         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3577         radeon_ring_write(ring, 0);
3578         radeon_ring_write(ring, 0);
3579
3580         /* init the CE partitions */
3581         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3582         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3583         radeon_ring_write(ring, 0xc000);
3584         radeon_ring_write(ring, 0xe000);
3585         radeon_ring_unlock_commit(rdev, ring, false);
3586
3587         si_cp_enable(rdev, true);
3588
3589         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3590         if (r) {
3591                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3592                 return r;
3593         }
3594
3595         /* setup clear context state */
3596         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3597         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3598
3599         for (i = 0; i < si_default_size; i++)
3600                 radeon_ring_write(ring, si_default_state[i]);
3601
3602         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3603         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3604
3605         /* set clear context state */
3606         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3607         radeon_ring_write(ring, 0);
3608
3609         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3610         radeon_ring_write(ring, 0x00000316);
3611         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3612         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3613
3614         radeon_ring_unlock_commit(rdev, ring, false);
3615
3616         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3617                 ring = &rdev->ring[i];
3618                 r = radeon_ring_lock(rdev, ring, 2);
3619
3620                 /* clear the compute context state */
3621                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3622                 radeon_ring_write(ring, 0);
3623
3624                 radeon_ring_unlock_commit(rdev, ring, false);
3625         }
3626
3627         return 0;
3628 }
3629
3630 static void si_cp_fini(struct radeon_device *rdev)
3631 {
3632         struct radeon_ring *ring;
3633         si_cp_enable(rdev, false);
3634
3635         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3636         radeon_ring_fini(rdev, ring);
3637         radeon_scratch_free(rdev, ring->rptr_save_reg);
3638
3639         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3640         radeon_ring_fini(rdev, ring);
3641         radeon_scratch_free(rdev, ring->rptr_save_reg);
3642
3643         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3644         radeon_ring_fini(rdev, ring);
3645         radeon_scratch_free(rdev, ring->rptr_save_reg);
3646 }
3647
3648 static int si_cp_resume(struct radeon_device *rdev)
3649 {
3650         struct radeon_ring *ring;
3651         u32 tmp;
3652         u32 rb_bufsz;
3653         int r;
3654
3655         si_enable_gui_idle_interrupt(rdev, false);
3656
3657         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3658         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3659
3660         /* Set the write pointer delay */
3661         WREG32(CP_RB_WPTR_DELAY, 0);
3662
3663         WREG32(CP_DEBUG, 0);
3664         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3665
3666         /* ring 0 - compute and gfx */
3667         /* Set ring buffer size */
3668         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3669         rb_bufsz = order_base_2(ring->ring_size / 8);
3670         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3671 #ifdef __BIG_ENDIAN
3672         tmp |= BUF_SWAP_32BIT;
3673 #endif
3674         WREG32(CP_RB0_CNTL, tmp);
3675
3676         /* Initialize the ring buffer's read and write pointers */
3677         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3678         ring->wptr = 0;
3679         WREG32(CP_RB0_WPTR, ring->wptr);
3680
3681         /* set the wb address whether it's enabled or not */
3682         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3683         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3684
3685         if (rdev->wb.enabled)
3686                 WREG32(SCRATCH_UMSK, 0xff);
3687         else {
3688                 tmp |= RB_NO_UPDATE;
3689                 WREG32(SCRATCH_UMSK, 0);
3690         }
3691
3692         mdelay(1);
3693         WREG32(CP_RB0_CNTL, tmp);
3694
3695         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3696
3697         /* ring1  - compute only */
3698         /* Set ring buffer size */
3699         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3700         rb_bufsz = order_base_2(ring->ring_size / 8);
3701         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3702 #ifdef __BIG_ENDIAN
3703         tmp |= BUF_SWAP_32BIT;
3704 #endif
3705         WREG32(CP_RB1_CNTL, tmp);
3706
3707         /* Initialize the ring buffer's read and write pointers */
3708         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3709         ring->wptr = 0;
3710         WREG32(CP_RB1_WPTR, ring->wptr);
3711
3712         /* set the wb address whether it's enabled or not */
3713         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3714         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3715
3716         mdelay(1);
3717         WREG32(CP_RB1_CNTL, tmp);
3718
3719         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3720
3721         /* ring2 - compute only */
3722         /* Set ring buffer size */
3723         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3724         rb_bufsz = order_base_2(ring->ring_size / 8);
3725         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3726 #ifdef __BIG_ENDIAN
3727         tmp |= BUF_SWAP_32BIT;
3728 #endif
3729         WREG32(CP_RB2_CNTL, tmp);
3730
3731         /* Initialize the ring buffer's read and write pointers */
3732         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3733         ring->wptr = 0;
3734         WREG32(CP_RB2_WPTR, ring->wptr);
3735
3736         /* set the wb address whether it's enabled or not */
3737         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3738         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3739
3740         mdelay(1);
3741         WREG32(CP_RB2_CNTL, tmp);
3742
3743         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3744
3745         /* start the rings */
3746         si_cp_start(rdev);
3747         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3748         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3749         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3750         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3751         if (r) {
3752                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3753                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3754                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3755                 return r;
3756         }
3757         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3758         if (r) {
3759                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3760         }
3761         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3762         if (r) {
3763                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3764         }
3765
3766         si_enable_gui_idle_interrupt(rdev, true);
3767
3768         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3769                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3770
3771         return 0;
3772 }
3773
3774 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3775 {
3776         u32 reset_mask = 0;
3777         u32 tmp;
3778
3779         /* GRBM_STATUS */
3780         tmp = RREG32(GRBM_STATUS);
3781         if (tmp & (PA_BUSY | SC_BUSY |
3782                    BCI_BUSY | SX_BUSY |
3783                    TA_BUSY | VGT_BUSY |
3784                    DB_BUSY | CB_BUSY |
3785                    GDS_BUSY | SPI_BUSY |
3786                    IA_BUSY | IA_BUSY_NO_DMA))
3787                 reset_mask |= RADEON_RESET_GFX;
3788
3789         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3790                    CP_BUSY | CP_COHERENCY_BUSY))
3791                 reset_mask |= RADEON_RESET_CP;
3792
3793         if (tmp & GRBM_EE_BUSY)
3794                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3795
3796         /* GRBM_STATUS2 */
3797         tmp = RREG32(GRBM_STATUS2);
3798         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3799                 reset_mask |= RADEON_RESET_RLC;
3800
3801         /* DMA_STATUS_REG 0 */
3802         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3803         if (!(tmp & DMA_IDLE))
3804                 reset_mask |= RADEON_RESET_DMA;
3805
3806         /* DMA_STATUS_REG 1 */
3807         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3808         if (!(tmp & DMA_IDLE))
3809                 reset_mask |= RADEON_RESET_DMA1;
3810
3811         /* SRBM_STATUS2 */
3812         tmp = RREG32(SRBM_STATUS2);
3813         if (tmp & DMA_BUSY)
3814                 reset_mask |= RADEON_RESET_DMA;
3815
3816         if (tmp & DMA1_BUSY)
3817                 reset_mask |= RADEON_RESET_DMA1;
3818
3819         /* SRBM_STATUS */
3820         tmp = RREG32(SRBM_STATUS);
3821
3822         if (tmp & IH_BUSY)
3823                 reset_mask |= RADEON_RESET_IH;
3824
3825         if (tmp & SEM_BUSY)
3826                 reset_mask |= RADEON_RESET_SEM;
3827
3828         if (tmp & GRBM_RQ_PENDING)
3829                 reset_mask |= RADEON_RESET_GRBM;
3830
3831         if (tmp & VMC_BUSY)
3832                 reset_mask |= RADEON_RESET_VMC;
3833
3834         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3835                    MCC_BUSY | MCD_BUSY))
3836                 reset_mask |= RADEON_RESET_MC;
3837
3838         if (evergreen_is_display_hung(rdev))
3839                 reset_mask |= RADEON_RESET_DISPLAY;
3840
3841         /* VM_L2_STATUS */
3842         tmp = RREG32(VM_L2_STATUS);
3843         if (tmp & L2_BUSY)
3844                 reset_mask |= RADEON_RESET_VMC;
3845
3846         /* Skip MC reset as it's mostly likely not hung, just busy */
3847         if (reset_mask & RADEON_RESET_MC) {
3848                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3849                 reset_mask &= ~RADEON_RESET_MC;
3850         }
3851
3852         return reset_mask;
3853 }
3854
3855 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3856 {
3857         struct evergreen_mc_save save;
3858         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3859         u32 tmp;
3860
3861         if (reset_mask == 0)
3862                 return;
3863
3864         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3865
3866         evergreen_print_gpu_status_regs(rdev);
3867         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3868                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3869         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3870                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3871
3872         /* disable PG/CG */
3873         si_fini_pg(rdev);
3874         si_fini_cg(rdev);
3875
3876         /* stop the rlc */
3877         si_rlc_stop(rdev);
3878
3879         /* Disable CP parsing/prefetching */
3880         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3881
3882         if (reset_mask & RADEON_RESET_DMA) {
3883                 /* dma0 */
3884                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3885                 tmp &= ~DMA_RB_ENABLE;
3886                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3887         }
3888         if (reset_mask & RADEON_RESET_DMA1) {
3889                 /* dma1 */
3890                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3891                 tmp &= ~DMA_RB_ENABLE;
3892                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3893         }
3894
3895         udelay(50);
3896
3897         evergreen_mc_stop(rdev, &save);
3898         if (evergreen_mc_wait_for_idle(rdev)) {
3899                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3900         }
3901
3902         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3903                 grbm_soft_reset = SOFT_RESET_CB |
3904                         SOFT_RESET_DB |
3905                         SOFT_RESET_GDS |
3906                         SOFT_RESET_PA |
3907                         SOFT_RESET_SC |
3908                         SOFT_RESET_BCI |
3909                         SOFT_RESET_SPI |
3910                         SOFT_RESET_SX |
3911                         SOFT_RESET_TC |
3912                         SOFT_RESET_TA |
3913                         SOFT_RESET_VGT |
3914                         SOFT_RESET_IA;
3915         }
3916
3917         if (reset_mask & RADEON_RESET_CP) {
3918                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3919
3920                 srbm_soft_reset |= SOFT_RESET_GRBM;
3921         }
3922
3923         if (reset_mask & RADEON_RESET_DMA)
3924                 srbm_soft_reset |= SOFT_RESET_DMA;
3925
3926         if (reset_mask & RADEON_RESET_DMA1)
3927                 srbm_soft_reset |= SOFT_RESET_DMA1;
3928
3929         if (reset_mask & RADEON_RESET_DISPLAY)
3930                 srbm_soft_reset |= SOFT_RESET_DC;
3931
3932         if (reset_mask & RADEON_RESET_RLC)
3933                 grbm_soft_reset |= SOFT_RESET_RLC;
3934
3935         if (reset_mask & RADEON_RESET_SEM)
3936                 srbm_soft_reset |= SOFT_RESET_SEM;
3937
3938         if (reset_mask & RADEON_RESET_IH)
3939                 srbm_soft_reset |= SOFT_RESET_IH;
3940
3941         if (reset_mask & RADEON_RESET_GRBM)
3942                 srbm_soft_reset |= SOFT_RESET_GRBM;
3943
3944         if (reset_mask & RADEON_RESET_VMC)
3945                 srbm_soft_reset |= SOFT_RESET_VMC;
3946
3947         if (reset_mask & RADEON_RESET_MC)
3948                 srbm_soft_reset |= SOFT_RESET_MC;
3949
3950         if (grbm_soft_reset) {
3951                 tmp = RREG32(GRBM_SOFT_RESET);
3952                 tmp |= grbm_soft_reset;
3953                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3954                 WREG32(GRBM_SOFT_RESET, tmp);
3955                 tmp = RREG32(GRBM_SOFT_RESET);
3956
3957                 udelay(50);
3958
3959                 tmp &= ~grbm_soft_reset;
3960                 WREG32(GRBM_SOFT_RESET, tmp);
3961                 tmp = RREG32(GRBM_SOFT_RESET);
3962         }
3963
3964         if (srbm_soft_reset) {
3965                 tmp = RREG32(SRBM_SOFT_RESET);
3966                 tmp |= srbm_soft_reset;
3967                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3968                 WREG32(SRBM_SOFT_RESET, tmp);
3969                 tmp = RREG32(SRBM_SOFT_RESET);
3970
3971                 udelay(50);
3972
3973                 tmp &= ~srbm_soft_reset;
3974                 WREG32(SRBM_SOFT_RESET, tmp);
3975                 tmp = RREG32(SRBM_SOFT_RESET);
3976         }
3977
3978         /* Wait a little for things to settle down */
3979         udelay(50);
3980
3981         evergreen_mc_resume(rdev, &save);
3982         udelay(50);
3983
3984         evergreen_print_gpu_status_regs(rdev);
3985 }
3986
3987 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3988 {
3989         u32 tmp, i;
3990
3991         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3992         tmp |= SPLL_BYPASS_EN;
3993         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3994
3995         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3996         tmp |= SPLL_CTLREQ_CHG;
3997         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3998
3999         for (i = 0; i < rdev->usec_timeout; i++) {
4000                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
4001                         break;
4002                 udelay(1);
4003         }
4004
4005         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4006         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4007         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4008
4009         tmp = RREG32(MPLL_CNTL_MODE);
4010         tmp &= ~MPLL_MCLK_SEL;
4011         WREG32(MPLL_CNTL_MODE, tmp);
4012 }
4013
4014 static void si_spll_powerdown(struct radeon_device *rdev)
4015 {
4016         u32 tmp;
4017
4018         tmp = RREG32(SPLL_CNTL_MODE);
4019         tmp |= SPLL_SW_DIR_CONTROL;
4020         WREG32(SPLL_CNTL_MODE, tmp);
4021
4022         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4023         tmp |= SPLL_RESET;
4024         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4025
4026         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4027         tmp |= SPLL_SLEEP;
4028         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4029
4030         tmp = RREG32(SPLL_CNTL_MODE);
4031         tmp &= ~SPLL_SW_DIR_CONTROL;
4032         WREG32(SPLL_CNTL_MODE, tmp);
4033 }
4034
4035 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4036 {
4037         struct evergreen_mc_save save;
4038         u32 tmp, i;
4039
4040         dev_info(rdev->dev, "GPU pci config reset\n");
4041
4042         /* disable dpm? */
4043
4044         /* disable cg/pg */
4045         si_fini_pg(rdev);
4046         si_fini_cg(rdev);
4047
4048         /* Disable CP parsing/prefetching */
4049         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4050         /* dma0 */
4051         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4052         tmp &= ~DMA_RB_ENABLE;
4053         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4054         /* dma1 */
4055         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4056         tmp &= ~DMA_RB_ENABLE;
4057         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4058         /* XXX other engines? */
4059
4060         /* halt the rlc, disable cp internal ints */
4061         si_rlc_stop(rdev);
4062
4063         udelay(50);
4064
4065         /* disable mem access */
4066         evergreen_mc_stop(rdev, &save);
4067         if (evergreen_mc_wait_for_idle(rdev)) {
4068                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4069         }
4070
4071         /* set mclk/sclk to bypass */
4072         si_set_clk_bypass_mode(rdev);
4073         /* powerdown spll */
4074         si_spll_powerdown(rdev);
4075         /* disable BM */
4076         pci_clear_master(rdev->pdev);
4077         /* reset */
4078         radeon_pci_config_reset(rdev);
4079         /* wait for asic to come out of reset */
4080         for (i = 0; i < rdev->usec_timeout; i++) {
4081                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4082                         break;
4083                 udelay(1);
4084         }
4085 }
4086
4087 int si_asic_reset(struct radeon_device *rdev, bool hard)
4088 {
4089         u32 reset_mask;
4090
4091         if (hard) {
4092                 si_gpu_pci_config_reset(rdev);
4093                 return 0;
4094         }
4095
4096         reset_mask = si_gpu_check_soft_reset(rdev);
4097
4098         if (reset_mask)
4099                 r600_set_bios_scratch_engine_hung(rdev, true);
4100
4101         /* try soft reset */
4102         si_gpu_soft_reset(rdev, reset_mask);
4103
4104         reset_mask = si_gpu_check_soft_reset(rdev);
4105
4106         /* try pci config reset */
4107         if (reset_mask && radeon_hard_reset)
4108                 si_gpu_pci_config_reset(rdev);
4109
4110         reset_mask = si_gpu_check_soft_reset(rdev);
4111
4112         if (!reset_mask)
4113                 r600_set_bios_scratch_engine_hung(rdev, false);
4114
4115         return 0;
4116 }
4117
4118 /**
4119  * si_gfx_is_lockup - Check if the GFX engine is locked up
4120  *
4121  * @rdev: radeon_device pointer
4122  * @ring: radeon_ring structure holding ring information
4123  *
4124  * Check if the GFX engine is locked up.
4125  * Returns true if the engine appears to be locked up, false if not.
4126  */
4127 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4128 {
4129         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4130
4131         if (!(reset_mask & (RADEON_RESET_GFX |
4132                             RADEON_RESET_COMPUTE |
4133                             RADEON_RESET_CP))) {
4134                 radeon_ring_lockup_update(rdev, ring);
4135                 return false;
4136         }
4137         return radeon_ring_test_lockup(rdev, ring);
4138 }
4139
4140 /* MC */
4141 static void si_mc_program(struct radeon_device *rdev)
4142 {
4143         struct evergreen_mc_save save;
4144         u32 tmp;
4145         int i, j;
4146
4147         /* Initialize HDP */
4148         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4149                 WREG32((0x2c14 + j), 0x00000000);
4150                 WREG32((0x2c18 + j), 0x00000000);
4151                 WREG32((0x2c1c + j), 0x00000000);
4152                 WREG32((0x2c20 + j), 0x00000000);
4153                 WREG32((0x2c24 + j), 0x00000000);
4154         }
4155         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4156
4157         evergreen_mc_stop(rdev, &save);
4158         if (radeon_mc_wait_for_idle(rdev)) {
4159                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4160         }
4161         if (!ASIC_IS_NODCE(rdev))
4162                 /* Lockout access through VGA aperture*/
4163                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4164         /* Update configuration */
4165         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4166                rdev->mc.vram_start >> 12);
4167         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4168                rdev->mc.vram_end >> 12);
4169         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4170                rdev->vram_scratch.gpu_addr >> 12);
4171         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4172         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4173         WREG32(MC_VM_FB_LOCATION, tmp);
4174         /* XXX double check these! */
4175         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4176         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4177         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4178         WREG32(MC_VM_AGP_BASE, 0);
4179         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4180         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4181         if (radeon_mc_wait_for_idle(rdev)) {
4182                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4183         }
4184         evergreen_mc_resume(rdev, &save);
4185         if (!ASIC_IS_NODCE(rdev)) {
4186                 /* we need to own VRAM, so turn off the VGA renderer here
4187                  * to stop it overwriting our objects */
4188                 rv515_vga_render_disable(rdev);
4189         }
4190 }
4191
4192 void si_vram_gtt_location(struct radeon_device *rdev,
4193                           struct radeon_mc *mc)
4194 {
4195         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4196                 /* leave room for at least 1024M GTT */
4197                 dev_warn(rdev->dev, "limiting VRAM\n");
4198                 mc->real_vram_size = 0xFFC0000000ULL;
4199                 mc->mc_vram_size = 0xFFC0000000ULL;
4200         }
4201         radeon_vram_location(rdev, &rdev->mc, 0);
4202         rdev->mc.gtt_base_align = 0;
4203         radeon_gtt_location(rdev, mc);
4204 }
4205
4206 static int si_mc_init(struct radeon_device *rdev)
4207 {
4208         u32 tmp;
4209         int chansize, numchan;
4210
4211         /* Get VRAM informations */
4212         rdev->mc.vram_is_ddr = true;
4213         tmp = RREG32(MC_ARB_RAMCFG);
4214         if (tmp & CHANSIZE_OVERRIDE) {
4215                 chansize = 16;
4216         } else if (tmp & CHANSIZE_MASK) {
4217                 chansize = 64;
4218         } else {
4219                 chansize = 32;
4220         }
4221         tmp = RREG32(MC_SHARED_CHMAP);
4222         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4223         case 0:
4224         default:
4225                 numchan = 1;
4226                 break;
4227         case 1:
4228                 numchan = 2;
4229                 break;
4230         case 2:
4231                 numchan = 4;
4232                 break;
4233         case 3:
4234                 numchan = 8;
4235                 break;
4236         case 4:
4237                 numchan = 3;
4238                 break;
4239         case 5:
4240                 numchan = 6;
4241                 break;
4242         case 6:
4243                 numchan = 10;
4244                 break;
4245         case 7:
4246                 numchan = 12;
4247                 break;
4248         case 8:
4249                 numchan = 16;
4250                 break;
4251         }
4252         rdev->mc.vram_width = numchan * chansize;
4253         /* Could aper size report 0 ? */
4254         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4255         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4256         /* size in MB on si */
4257         tmp = RREG32(CONFIG_MEMSIZE);
4258         /* some boards may have garbage in the upper 16 bits */
4259         if (tmp & 0xffff0000) {
4260                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4261                 if (tmp & 0xffff)
4262                         tmp &= 0xffff;
4263         }
4264         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4265         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4266         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4267         si_vram_gtt_location(rdev, &rdev->mc);
4268         radeon_update_bandwidth_info(rdev);
4269
4270         return 0;
4271 }
4272
4273 /*
4274  * GART
4275  */
4276 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4277 {
4278         /* flush hdp cache */
4279         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4280
4281         /* bits 0-15 are the VM contexts0-15 */
4282         WREG32(VM_INVALIDATE_REQUEST, 1);
4283 }
4284
4285 static int si_pcie_gart_enable(struct radeon_device *rdev)
4286 {
4287         int r, i;
4288
4289         if (rdev->gart.robj == NULL) {
4290                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4291                 return -EINVAL;
4292         }
4293         r = radeon_gart_table_vram_pin(rdev);
4294         if (r)
4295                 return r;
4296         /* Setup TLB control */
4297         WREG32(MC_VM_MX_L1_TLB_CNTL,
4298                (0xA << 7) |
4299                ENABLE_L1_TLB |
4300                ENABLE_L1_FRAGMENT_PROCESSING |
4301                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4302                ENABLE_ADVANCED_DRIVER_MODEL |
4303                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4304         /* Setup L2 cache */
4305         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4306                ENABLE_L2_FRAGMENT_PROCESSING |
4307                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4308                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4309                EFFECTIVE_L2_QUEUE_SIZE(7) |
4310                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4311         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4312         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4313                BANK_SELECT(4) |
4314                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4315         /* setup context0 */
4316         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4317         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4318         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4319         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4320                         (u32)(rdev->dummy_page.addr >> 12));
4321         WREG32(VM_CONTEXT0_CNTL2, 0);
4322         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4323                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4324
4325         WREG32(0x15D4, 0);
4326         WREG32(0x15D8, 0);
4327         WREG32(0x15DC, 0);
4328
4329         /* empty context1-15 */
4330         /* set vm size, must be a multiple of 4 */
4331         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4332         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4333         /* Assign the pt base to something valid for now; the pts used for
4334          * the VMs are determined by the application and setup and assigned
4335          * on the fly in the vm part of radeon_gart.c
4336          */
4337         for (i = 1; i < 16; i++) {
4338                 if (i < 8)
4339                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4340                                rdev->vm_manager.saved_table_addr[i]);
4341                 else
4342                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4343                                rdev->vm_manager.saved_table_addr[i]);
4344         }
4345
4346         /* enable context1-15 */
4347         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4348                (u32)(rdev->dummy_page.addr >> 12));
4349         WREG32(VM_CONTEXT1_CNTL2, 4);
4350         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4351                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4352                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4353                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4354                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4355                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4356                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4357                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4358                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4359                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4360                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4361                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4362                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4363                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4364
4365         si_pcie_gart_tlb_flush(rdev);
4366         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4367                  (unsigned)(rdev->mc.gtt_size >> 20),
4368                  (unsigned long long)rdev->gart.table_addr);
4369         rdev->gart.ready = true;
4370         return 0;
4371 }
4372
4373 static void si_pcie_gart_disable(struct radeon_device *rdev)
4374 {
4375         unsigned i;
4376
4377         for (i = 1; i < 16; ++i) {
4378                 uint32_t reg;
4379                 if (i < 8)
4380                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4381                 else
4382                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4383                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4384         }
4385
4386         /* Disable all tables */
4387         WREG32(VM_CONTEXT0_CNTL, 0);
4388         WREG32(VM_CONTEXT1_CNTL, 0);
4389         /* Setup TLB control */
4390         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4391                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4392         /* Setup L2 cache */
4393         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4394                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4395                EFFECTIVE_L2_QUEUE_SIZE(7) |
4396                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4397         WREG32(VM_L2_CNTL2, 0);
4398         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4399                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4400         radeon_gart_table_vram_unpin(rdev);
4401 }
4402
4403 static void si_pcie_gart_fini(struct radeon_device *rdev)
4404 {
4405         si_pcie_gart_disable(rdev);
4406         radeon_gart_table_vram_free(rdev);
4407         radeon_gart_fini(rdev);
4408 }
4409
4410 /* vm parser */
4411 static bool si_vm_reg_valid(u32 reg)
4412 {
4413         /* context regs are fine */
4414         if (reg >= 0x28000)
4415                 return true;
4416
4417         /* shader regs are also fine */
4418         if (reg >= 0xB000 && reg < 0xC000)
4419                 return true;
4420
4421         /* check config regs */
4422         switch (reg) {
4423         case GRBM_GFX_INDEX:
4424         case CP_STRMOUT_CNTL:
4425         case VGT_VTX_VECT_EJECT_REG:
4426         case VGT_CACHE_INVALIDATION:
4427         case VGT_ESGS_RING_SIZE:
4428         case VGT_GSVS_RING_SIZE:
4429         case VGT_GS_VERTEX_REUSE:
4430         case VGT_PRIMITIVE_TYPE:
4431         case VGT_INDEX_TYPE:
4432         case VGT_NUM_INDICES:
4433         case VGT_NUM_INSTANCES:
4434         case VGT_TF_RING_SIZE:
4435         case VGT_HS_OFFCHIP_PARAM:
4436         case VGT_TF_MEMORY_BASE:
4437         case PA_CL_ENHANCE:
4438         case PA_SU_LINE_STIPPLE_VALUE:
4439         case PA_SC_LINE_STIPPLE_STATE:
4440         case PA_SC_ENHANCE:
4441         case SQC_CACHES:
4442         case SPI_STATIC_THREAD_MGMT_1:
4443         case SPI_STATIC_THREAD_MGMT_2:
4444         case SPI_STATIC_THREAD_MGMT_3:
4445         case SPI_PS_MAX_WAVE_ID:
4446         case SPI_CONFIG_CNTL:
4447         case SPI_CONFIG_CNTL_1:
4448         case TA_CNTL_AUX:
4449         case TA_CS_BC_BASE_ADDR:
4450                 return true;
4451         default:
4452                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4453                 return false;
4454         }
4455 }
4456
4457 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4458                                   u32 *ib, struct radeon_cs_packet *pkt)
4459 {
4460         switch (pkt->opcode) {
4461         case PACKET3_NOP:
4462         case PACKET3_SET_BASE:
4463         case PACKET3_SET_CE_DE_COUNTERS:
4464         case PACKET3_LOAD_CONST_RAM:
4465         case PACKET3_WRITE_CONST_RAM:
4466         case PACKET3_WRITE_CONST_RAM_OFFSET:
4467         case PACKET3_DUMP_CONST_RAM:
4468         case PACKET3_INCREMENT_CE_COUNTER:
4469         case PACKET3_WAIT_ON_DE_COUNTER:
4470         case PACKET3_CE_WRITE:
4471                 break;
4472         default:
4473                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4474                 return -EINVAL;
4475         }
4476         return 0;
4477 }
4478
4479 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4480 {
4481         u32 start_reg, reg, i;
4482         u32 command = ib[idx + 4];
4483         u32 info = ib[idx + 1];
4484         u32 idx_value = ib[idx];
4485         if (command & PACKET3_CP_DMA_CMD_SAS) {
4486                 /* src address space is register */
4487                 if (((info & 0x60000000) >> 29) == 0) {
4488                         start_reg = idx_value << 2;
4489                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4490                                 reg = start_reg;
4491                                 if (!si_vm_reg_valid(reg)) {
4492                                         DRM_ERROR("CP DMA Bad SRC register\n");
4493                                         return -EINVAL;
4494                                 }
4495                         } else {
4496                                 for (i = 0; i < (command & 0x1fffff); i++) {
4497                                         reg = start_reg + (4 * i);
4498                                         if (!si_vm_reg_valid(reg)) {
4499                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4500                                                 return -EINVAL;
4501                                         }
4502                                 }
4503                         }
4504                 }
4505         }
4506         if (command & PACKET3_CP_DMA_CMD_DAS) {
4507                 /* dst address space is register */
4508                 if (((info & 0x00300000) >> 20) == 0) {
4509                         start_reg = ib[idx + 2];
4510                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4511                                 reg = start_reg;
4512                                 if (!si_vm_reg_valid(reg)) {
4513                                         DRM_ERROR("CP DMA Bad DST register\n");
4514                                         return -EINVAL;
4515                                 }
4516                         } else {
4517                                 for (i = 0; i < (command & 0x1fffff); i++) {
4518                                         reg = start_reg + (4 * i);
4519                                 if (!si_vm_reg_valid(reg)) {
4520                                                 DRM_ERROR("CP DMA Bad DST register\n");
4521                                                 return -EINVAL;
4522                                         }
4523                                 }
4524                         }
4525                 }
4526         }
4527         return 0;
4528 }
4529
4530 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4531                                    u32 *ib, struct radeon_cs_packet *pkt)
4532 {
4533         int r;
4534         u32 idx = pkt->idx + 1;
4535         u32 idx_value = ib[idx];
4536         u32 start_reg, end_reg, reg, i;
4537
4538         switch (pkt->opcode) {
4539         case PACKET3_NOP:
4540         case PACKET3_SET_BASE:
4541         case PACKET3_CLEAR_STATE:
4542         case PACKET3_INDEX_BUFFER_SIZE:
4543         case PACKET3_DISPATCH_DIRECT:
4544         case PACKET3_DISPATCH_INDIRECT:
4545         case PACKET3_ALLOC_GDS:
4546         case PACKET3_WRITE_GDS_RAM:
4547         case PACKET3_ATOMIC_GDS:
4548         case PACKET3_ATOMIC:
4549         case PACKET3_OCCLUSION_QUERY:
4550         case PACKET3_SET_PREDICATION:
4551         case PACKET3_COND_EXEC:
4552         case PACKET3_PRED_EXEC:
4553         case PACKET3_DRAW_INDIRECT:
4554         case PACKET3_DRAW_INDEX_INDIRECT:
4555         case PACKET3_INDEX_BASE:
4556         case PACKET3_DRAW_INDEX_2:
4557         case PACKET3_CONTEXT_CONTROL:
4558         case PACKET3_INDEX_TYPE:
4559         case PACKET3_DRAW_INDIRECT_MULTI:
4560         case PACKET3_DRAW_INDEX_AUTO:
4561         case PACKET3_DRAW_INDEX_IMMD:
4562         case PACKET3_NUM_INSTANCES:
4563         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4564         case PACKET3_STRMOUT_BUFFER_UPDATE:
4565         case PACKET3_DRAW_INDEX_OFFSET_2:
4566         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4567         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4568         case PACKET3_MPEG_INDEX:
4569         case PACKET3_WAIT_REG_MEM:
4570         case PACKET3_MEM_WRITE:
4571         case PACKET3_PFP_SYNC_ME:
4572         case PACKET3_SURFACE_SYNC:
4573         case PACKET3_EVENT_WRITE:
4574         case PACKET3_EVENT_WRITE_EOP:
4575         case PACKET3_EVENT_WRITE_EOS:
4576         case PACKET3_SET_CONTEXT_REG:
4577         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4578         case PACKET3_SET_SH_REG:
4579         case PACKET3_SET_SH_REG_OFFSET:
4580         case PACKET3_INCREMENT_DE_COUNTER:
4581         case PACKET3_WAIT_ON_CE_COUNTER:
4582         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4583         case PACKET3_ME_WRITE:
4584                 break;
4585         case PACKET3_COPY_DATA:
4586                 if ((idx_value & 0xf00) == 0) {
4587                         reg = ib[idx + 3] * 4;
4588                         if (!si_vm_reg_valid(reg))
4589                                 return -EINVAL;
4590                 }
4591                 break;
4592         case PACKET3_WRITE_DATA:
4593                 if ((idx_value & 0xf00) == 0) {
4594                         start_reg = ib[idx + 1] * 4;
4595                         if (idx_value & 0x10000) {
4596                                 if (!si_vm_reg_valid(start_reg))
4597                                         return -EINVAL;
4598                         } else {
4599                                 for (i = 0; i < (pkt->count - 2); i++) {
4600                                         reg = start_reg + (4 * i);
4601                                         if (!si_vm_reg_valid(reg))
4602                                                 return -EINVAL;
4603                                 }
4604                         }
4605                 }
4606                 break;
4607         case PACKET3_COND_WRITE:
4608                 if (idx_value & 0x100) {
4609                         reg = ib[idx + 5] * 4;
4610                         if (!si_vm_reg_valid(reg))
4611                                 return -EINVAL;
4612                 }
4613                 break;
4614         case PACKET3_COPY_DW:
4615                 if (idx_value & 0x2) {
4616                         reg = ib[idx + 3] * 4;
4617                         if (!si_vm_reg_valid(reg))
4618                                 return -EINVAL;
4619                 }
4620                 break;
4621         case PACKET3_SET_CONFIG_REG:
4622                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4623                 end_reg = 4 * pkt->count + start_reg - 4;
4624                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4625                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4626                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4627                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4628                         return -EINVAL;
4629                 }
4630                 for (i = 0; i < pkt->count; i++) {
4631                         reg = start_reg + (4 * i);
4632                         if (!si_vm_reg_valid(reg))
4633                                 return -EINVAL;
4634                 }
4635                 break;
4636         case PACKET3_CP_DMA:
4637                 r = si_vm_packet3_cp_dma_check(ib, idx);
4638                 if (r)
4639                         return r;
4640                 break;
4641         default:
4642                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4643                 return -EINVAL;
4644         }
4645         return 0;
4646 }
4647
4648 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4649                                        u32 *ib, struct radeon_cs_packet *pkt)
4650 {
4651         int r;
4652         u32 idx = pkt->idx + 1;
4653         u32 idx_value = ib[idx];
4654         u32 start_reg, reg, i;
4655
4656         switch (pkt->opcode) {
4657         case PACKET3_NOP:
4658         case PACKET3_SET_BASE:
4659         case PACKET3_CLEAR_STATE:
4660         case PACKET3_DISPATCH_DIRECT:
4661         case PACKET3_DISPATCH_INDIRECT:
4662         case PACKET3_ALLOC_GDS:
4663         case PACKET3_WRITE_GDS_RAM:
4664         case PACKET3_ATOMIC_GDS:
4665         case PACKET3_ATOMIC:
4666         case PACKET3_OCCLUSION_QUERY:
4667         case PACKET3_SET_PREDICATION:
4668         case PACKET3_COND_EXEC:
4669         case PACKET3_PRED_EXEC:
4670         case PACKET3_CONTEXT_CONTROL:
4671         case PACKET3_STRMOUT_BUFFER_UPDATE:
4672         case PACKET3_WAIT_REG_MEM:
4673         case PACKET3_MEM_WRITE:
4674         case PACKET3_PFP_SYNC_ME:
4675         case PACKET3_SURFACE_SYNC:
4676         case PACKET3_EVENT_WRITE:
4677         case PACKET3_EVENT_WRITE_EOP:
4678         case PACKET3_EVENT_WRITE_EOS:
4679         case PACKET3_SET_CONTEXT_REG:
4680         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4681         case PACKET3_SET_SH_REG:
4682         case PACKET3_SET_SH_REG_OFFSET:
4683         case PACKET3_INCREMENT_DE_COUNTER:
4684         case PACKET3_WAIT_ON_CE_COUNTER:
4685         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4686         case PACKET3_ME_WRITE:
4687                 break;
4688         case PACKET3_COPY_DATA:
4689                 if ((idx_value & 0xf00) == 0) {
4690                         reg = ib[idx + 3] * 4;
4691                         if (!si_vm_reg_valid(reg))
4692                                 return -EINVAL;
4693                 }
4694                 break;
4695         case PACKET3_WRITE_DATA:
4696                 if ((idx_value & 0xf00) == 0) {
4697                         start_reg = ib[idx + 1] * 4;
4698                         if (idx_value & 0x10000) {
4699                                 if (!si_vm_reg_valid(start_reg))
4700                                         return -EINVAL;
4701                         } else {
4702                                 for (i = 0; i < (pkt->count - 2); i++) {
4703                                         reg = start_reg + (4 * i);
4704                                         if (!si_vm_reg_valid(reg))
4705                                                 return -EINVAL;
4706                                 }
4707                         }
4708                 }
4709                 break;
4710         case PACKET3_COND_WRITE:
4711                 if (idx_value & 0x100) {
4712                         reg = ib[idx + 5] * 4;
4713                         if (!si_vm_reg_valid(reg))
4714                                 return -EINVAL;
4715                 }
4716                 break;
4717         case PACKET3_COPY_DW:
4718                 if (idx_value & 0x2) {
4719                         reg = ib[idx + 3] * 4;
4720                         if (!si_vm_reg_valid(reg))
4721                                 return -EINVAL;
4722                 }
4723                 break;
4724         case PACKET3_CP_DMA:
4725                 r = si_vm_packet3_cp_dma_check(ib, idx);
4726                 if (r)
4727                         return r;
4728                 break;
4729         default:
4730                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4731                 return -EINVAL;
4732         }
4733         return 0;
4734 }
4735
4736 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4737 {
4738         int ret = 0;
4739         u32 idx = 0, i;
4740         struct radeon_cs_packet pkt;
4741
4742         do {
4743                 pkt.idx = idx;
4744                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4745                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4746                 pkt.one_reg_wr = 0;
4747                 switch (pkt.type) {
4748                 case RADEON_PACKET_TYPE0:
4749                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4750                         ret = -EINVAL;
4751                         break;
4752                 case RADEON_PACKET_TYPE2:
4753                         idx += 1;
4754                         break;
4755                 case RADEON_PACKET_TYPE3:
4756                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4757                         if (ib->is_const_ib)
4758                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4759                         else {
4760                                 switch (ib->ring) {
4761                                 case RADEON_RING_TYPE_GFX_INDEX:
4762                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4763                                         break;
4764                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4765                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4766                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4767                                         break;
4768                                 default:
4769                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4770                                         ret = -EINVAL;
4771                                         break;
4772                                 }
4773                         }
4774                         idx += pkt.count + 2;
4775                         break;
4776                 default:
4777                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4778                         ret = -EINVAL;
4779                         break;
4780                 }
4781                 if (ret) {
4782                         for (i = 0; i < ib->length_dw; i++) {
4783                                 if (i == idx)
4784                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4785                                 else
4786                                         printk("\t0x%08x\n", ib->ptr[i]);
4787                         }
4788                         break;
4789                 }
4790         } while (idx < ib->length_dw);
4791
4792         return ret;
4793 }
4794
4795 /*
4796  * vm
4797  */
4798 int si_vm_init(struct radeon_device *rdev)
4799 {
4800         /* number of VMs */
4801         rdev->vm_manager.nvm = 16;
4802         /* base offset of vram pages */
4803         rdev->vm_manager.vram_base_offset = 0;
4804
4805         return 0;
4806 }
4807
4808 void si_vm_fini(struct radeon_device *rdev)
4809 {
4810 }
4811
4812 /**
4813  * si_vm_decode_fault - print human readable fault info
4814  *
4815  * @rdev: radeon_device pointer
4816  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4817  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4818  *
4819  * Print human readable fault information (SI).
4820  */
4821 static void si_vm_decode_fault(struct radeon_device *rdev,
4822                                u32 status, u32 addr)
4823 {
4824         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4825         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4826         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4827         char *block;
4828
4829         if (rdev->family == CHIP_TAHITI) {
4830                 switch (mc_id) {
4831                 case 160:
4832                 case 144:
4833                 case 96:
4834                 case 80:
4835                 case 224:
4836                 case 208:
4837                 case 32:
4838                 case 16:
4839                         block = "CB";
4840                         break;
4841                 case 161:
4842                 case 145:
4843                 case 97:
4844                 case 81:
4845                 case 225:
4846                 case 209:
4847                 case 33:
4848                 case 17:
4849                         block = "CB_FMASK";
4850                         break;
4851                 case 162:
4852                 case 146:
4853                 case 98:
4854                 case 82:
4855                 case 226:
4856                 case 210:
4857                 case 34:
4858                 case 18:
4859                         block = "CB_CMASK";
4860                         break;
4861                 case 163:
4862                 case 147:
4863                 case 99:
4864                 case 83:
4865                 case 227:
4866                 case 211:
4867                 case 35:
4868                 case 19:
4869                         block = "CB_IMMED";
4870                         break;
4871                 case 164:
4872                 case 148:
4873                 case 100:
4874                 case 84:
4875                 case 228:
4876                 case 212:
4877                 case 36:
4878                 case 20:
4879                         block = "DB";
4880                         break;
4881                 case 165:
4882                 case 149:
4883                 case 101:
4884                 case 85:
4885                 case 229:
4886                 case 213:
4887                 case 37:
4888                 case 21:
4889                         block = "DB_HTILE";
4890                         break;
4891                 case 167:
4892                 case 151:
4893                 case 103:
4894                 case 87:
4895                 case 231:
4896                 case 215:
4897                 case 39:
4898                 case 23:
4899                         block = "DB_STEN";
4900                         break;
4901                 case 72:
4902                 case 68:
4903                 case 64:
4904                 case 8:
4905                 case 4:
4906                 case 0:
4907                 case 136:
4908                 case 132:
4909                 case 128:
4910                 case 200:
4911                 case 196:
4912                 case 192:
4913                         block = "TC";
4914                         break;
4915                 case 112:
4916                 case 48:
4917                         block = "CP";
4918                         break;
4919                 case 49:
4920                 case 177:
4921                 case 50:
4922                 case 178:
4923                         block = "SH";
4924                         break;
4925                 case 53:
4926                 case 190:
4927                         block = "VGT";
4928                         break;
4929                 case 117:
4930                         block = "IH";
4931                         break;
4932                 case 51:
4933                 case 115:
4934                         block = "RLC";
4935                         break;
4936                 case 119:
4937                 case 183:
4938                         block = "DMA0";
4939                         break;
4940                 case 61:
4941                         block = "DMA1";
4942                         break;
4943                 case 248:
4944                 case 120:
4945                         block = "HDP";
4946                         break;
4947                 default:
4948                         block = "unknown";
4949                         break;
4950                 }
4951         } else {
4952                 switch (mc_id) {
4953                 case 32:
4954                 case 16:
4955                 case 96:
4956                 case 80:
4957                 case 160:
4958                 case 144:
4959                 case 224:
4960                 case 208:
4961                         block = "CB";
4962                         break;
4963                 case 33:
4964                 case 17:
4965                 case 97:
4966                 case 81:
4967                 case 161:
4968                 case 145:
4969                 case 225:
4970                 case 209:
4971                         block = "CB_FMASK";
4972                         break;
4973                 case 34:
4974                 case 18:
4975                 case 98:
4976                 case 82:
4977                 case 162:
4978                 case 146:
4979                 case 226:
4980                 case 210:
4981                         block = "CB_CMASK";
4982                         break;
4983                 case 35:
4984                 case 19:
4985                 case 99:
4986                 case 83:
4987                 case 163:
4988                 case 147:
4989                 case 227:
4990                 case 211:
4991                         block = "CB_IMMED";
4992                         break;
4993                 case 36:
4994                 case 20:
4995                 case 100:
4996                 case 84:
4997                 case 164:
4998                 case 148:
4999                 case 228:
5000                 case 212:
5001                         block = "DB";
5002                         break;
5003                 case 37:
5004                 case 21:
5005                 case 101:
5006                 case 85:
5007                 case 165:
5008                 case 149:
5009                 case 229:
5010                 case 213:
5011                         block = "DB_HTILE";
5012                         break;
5013                 case 39:
5014                 case 23:
5015                 case 103:
5016                 case 87:
5017                 case 167:
5018                 case 151:
5019                 case 231:
5020                 case 215:
5021                         block = "DB_STEN";
5022                         break;
5023                 case 72:
5024                 case 68:
5025                 case 8:
5026                 case 4:
5027                 case 136:
5028                 case 132:
5029                 case 200:
5030                 case 196:
5031                         block = "TC";
5032                         break;
5033                 case 112:
5034                 case 48:
5035                         block = "CP";
5036                         break;
5037                 case 49:
5038                 case 177:
5039                 case 50:
5040                 case 178:
5041                         block = "SH";
5042                         break;
5043                 case 53:
5044                         block = "VGT";
5045                         break;
5046                 case 117:
5047                         block = "IH";
5048                         break;
5049                 case 51:
5050                 case 115:
5051                         block = "RLC";
5052                         break;
5053                 case 119:
5054                 case 183:
5055                         block = "DMA0";
5056                         break;
5057                 case 61:
5058                         block = "DMA1";
5059                         break;
5060                 case 248:
5061                 case 120:
5062                         block = "HDP";
5063                         break;
5064                 default:
5065                         block = "unknown";
5066                         break;
5067                 }
5068         }
5069
5070         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5071                protections, vmid, addr,
5072                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5073                block, mc_id);
5074 }
5075
5076 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5077                  unsigned vm_id, uint64_t pd_addr)
5078 {
5079         /* write new base address */
5080         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5081         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5082                                  WRITE_DATA_DST_SEL(0)));
5083
5084         if (vm_id < 8) {
5085                 radeon_ring_write(ring,
5086                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5087         } else {
5088                 radeon_ring_write(ring,
5089                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5090         }
5091         radeon_ring_write(ring, 0);
5092         radeon_ring_write(ring, pd_addr >> 12);
5093
5094         /* flush hdp cache */
5095         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5096         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5097                                  WRITE_DATA_DST_SEL(0)));
5098         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5099         radeon_ring_write(ring, 0);
5100         radeon_ring_write(ring, 0x1);
5101
5102         /* bits 0-15 are the VM contexts0-15 */
5103         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5104         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5105                                  WRITE_DATA_DST_SEL(0)));
5106         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5107         radeon_ring_write(ring, 0);
5108         radeon_ring_write(ring, 1 << vm_id);
5109
5110         /* wait for the invalidate to complete */
5111         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5112         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5113                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5114         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5115         radeon_ring_write(ring, 0);
5116         radeon_ring_write(ring, 0); /* ref */
5117         radeon_ring_write(ring, 0); /* mask */
5118         radeon_ring_write(ring, 0x20); /* poll interval */
5119
5120         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5121         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5122         radeon_ring_write(ring, 0x0);
5123 }
5124
5125 /*
5126  *  Power and clock gating
5127  */
5128 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5129 {
5130         int i;
5131
5132         for (i = 0; i < rdev->usec_timeout; i++) {
5133                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5134                         break;
5135                 udelay(1);
5136         }
5137
5138         for (i = 0; i < rdev->usec_timeout; i++) {
5139                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5140                         break;
5141                 udelay(1);
5142         }
5143 }
5144
5145 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5146                                          bool enable)
5147 {
5148         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5149         u32 mask;
5150         int i;
5151
5152         if (enable)
5153                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5154         else
5155                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5156         WREG32(CP_INT_CNTL_RING0, tmp);
5157
5158         if (!enable) {
5159                 /* read a gfx register */
5160                 tmp = RREG32(DB_DEPTH_INFO);
5161
5162                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5163                 for (i = 0; i < rdev->usec_timeout; i++) {
5164                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5165                                 break;
5166                         udelay(1);
5167                 }
5168         }
5169 }
5170
5171 static void si_set_uvd_dcm(struct radeon_device *rdev,
5172                            bool sw_mode)
5173 {
5174         u32 tmp, tmp2;
5175
5176         tmp = RREG32(UVD_CGC_CTRL);
5177         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5178         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5179
5180         if (sw_mode) {
5181                 tmp &= ~0x7ffff800;
5182                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5183         } else {
5184                 tmp |= 0x7ffff800;
5185                 tmp2 = 0;
5186         }
5187
5188         WREG32(UVD_CGC_CTRL, tmp);
5189         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5190 }
5191
5192 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5193 {
5194         bool hw_mode = true;
5195
5196         if (hw_mode) {
5197                 si_set_uvd_dcm(rdev, false);
5198         } else {
5199                 u32 tmp = RREG32(UVD_CGC_CTRL);
5200                 tmp &= ~DCM;
5201                 WREG32(UVD_CGC_CTRL, tmp);
5202         }
5203 }
5204
5205 static u32 si_halt_rlc(struct radeon_device *rdev)
5206 {
5207         u32 data, orig;
5208
5209         orig = data = RREG32(RLC_CNTL);
5210
5211         if (data & RLC_ENABLE) {
5212                 data &= ~RLC_ENABLE;
5213                 WREG32(RLC_CNTL, data);
5214
5215                 si_wait_for_rlc_serdes(rdev);
5216         }
5217
5218         return orig;
5219 }
5220
5221 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5222 {
5223         u32 tmp;
5224
5225         tmp = RREG32(RLC_CNTL);
5226         if (tmp != rlc)
5227                 WREG32(RLC_CNTL, rlc);
5228 }
5229
5230 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5231 {
5232         u32 data, orig;
5233
5234         orig = data = RREG32(DMA_PG);
5235         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5236                 data |= PG_CNTL_ENABLE;
5237         else
5238                 data &= ~PG_CNTL_ENABLE;
5239         if (orig != data)
5240                 WREG32(DMA_PG, data);
5241 }
5242
5243 static void si_init_dma_pg(struct radeon_device *rdev)
5244 {
5245         u32 tmp;
5246
5247         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5248         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5249
5250         for (tmp = 0; tmp < 5; tmp++)
5251                 WREG32(DMA_PGFSM_WRITE, 0);
5252 }
5253
5254 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5255                                bool enable)
5256 {
5257         u32 tmp;
5258
5259         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5260                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5261                 WREG32(RLC_TTOP_D, tmp);
5262
5263                 tmp = RREG32(RLC_PG_CNTL);
5264                 tmp |= GFX_PG_ENABLE;
5265                 WREG32(RLC_PG_CNTL, tmp);
5266
5267                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5268                 tmp |= AUTO_PG_EN;
5269                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5270         } else {
5271                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5272                 tmp &= ~AUTO_PG_EN;
5273                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5274
5275                 tmp = RREG32(DB_RENDER_CONTROL);
5276         }
5277 }
5278
5279 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5280 {
5281         u32 tmp;
5282
5283         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5284
5285         tmp = RREG32(RLC_PG_CNTL);
5286         tmp |= GFX_PG_SRC;
5287         WREG32(RLC_PG_CNTL, tmp);
5288
5289         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5290
5291         tmp = RREG32(RLC_AUTO_PG_CTRL);
5292
5293         tmp &= ~GRBM_REG_SGIT_MASK;
5294         tmp |= GRBM_REG_SGIT(0x700);
5295         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5296         WREG32(RLC_AUTO_PG_CTRL, tmp);
5297 }
5298
5299 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5300 {
5301         u32 mask = 0, tmp, tmp1;
5302         int i;
5303
5304         si_select_se_sh(rdev, se, sh);
5305         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5306         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5307         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5308
5309         tmp &= 0xffff0000;
5310
5311         tmp |= tmp1;
5312         tmp >>= 16;
5313
5314         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5315                 mask <<= 1;
5316                 mask |= 1;
5317         }
5318
5319         return (~tmp) & mask;
5320 }
5321
5322 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5323 {
5324         u32 i, j, k, active_cu_number = 0;
5325         u32 mask, counter, cu_bitmap;
5326         u32 tmp = 0;
5327
5328         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5329                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5330                         mask = 1;
5331                         cu_bitmap = 0;
5332                         counter  = 0;
5333                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5334                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5335                                         if (counter < 2)
5336                                                 cu_bitmap |= mask;
5337                                         counter++;
5338                                 }
5339                                 mask <<= 1;
5340                         }
5341
5342                         active_cu_number += counter;
5343                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5344                 }
5345         }
5346
5347         WREG32(RLC_PG_AO_CU_MASK, tmp);
5348
5349         tmp = RREG32(RLC_MAX_PG_CU);
5350         tmp &= ~MAX_PU_CU_MASK;
5351         tmp |= MAX_PU_CU(active_cu_number);
5352         WREG32(RLC_MAX_PG_CU, tmp);
5353 }
5354
5355 static void si_enable_cgcg(struct radeon_device *rdev,
5356                            bool enable)
5357 {
5358         u32 data, orig, tmp;
5359
5360         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5361
5362         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5363                 si_enable_gui_idle_interrupt(rdev, true);
5364
5365                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5366
5367                 tmp = si_halt_rlc(rdev);
5368
5369                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5370                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5371                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5372
5373                 si_wait_for_rlc_serdes(rdev);
5374
5375                 si_update_rlc(rdev, tmp);
5376
5377                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5378
5379                 data |= CGCG_EN | CGLS_EN;
5380         } else {
5381                 si_enable_gui_idle_interrupt(rdev, false);
5382
5383                 RREG32(CB_CGTT_SCLK_CTRL);
5384                 RREG32(CB_CGTT_SCLK_CTRL);
5385                 RREG32(CB_CGTT_SCLK_CTRL);
5386                 RREG32(CB_CGTT_SCLK_CTRL);
5387
5388                 data &= ~(CGCG_EN | CGLS_EN);
5389         }
5390
5391         if (orig != data)
5392                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5393 }
5394
5395 static void si_enable_mgcg(struct radeon_device *rdev,
5396                            bool enable)
5397 {
5398         u32 data, orig, tmp = 0;
5399
5400         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5401                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5402                 data = 0x96940200;
5403                 if (orig != data)
5404                         WREG32(CGTS_SM_CTRL_REG, data);
5405
5406                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5407                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5408                         data |= CP_MEM_LS_EN;
5409                         if (orig != data)
5410                                 WREG32(CP_MEM_SLP_CNTL, data);
5411                 }
5412
5413                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5414                 data &= 0xffffffc0;
5415                 if (orig != data)
5416                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5417
5418                 tmp = si_halt_rlc(rdev);
5419
5420                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5421                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5422                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5423
5424                 si_update_rlc(rdev, tmp);
5425         } else {
5426                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5427                 data |= 0x00000003;
5428                 if (orig != data)
5429                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5430
5431                 data = RREG32(CP_MEM_SLP_CNTL);
5432                 if (data & CP_MEM_LS_EN) {
5433                         data &= ~CP_MEM_LS_EN;
5434                         WREG32(CP_MEM_SLP_CNTL, data);
5435                 }
5436                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5437                 data |= LS_OVERRIDE | OVERRIDE;
5438                 if (orig != data)
5439                         WREG32(CGTS_SM_CTRL_REG, data);
5440
5441                 tmp = si_halt_rlc(rdev);
5442
5443                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5444                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5445                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5446
5447                 si_update_rlc(rdev, tmp);
5448         }
5449 }
5450
5451 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5452                                bool enable)
5453 {
5454         u32 orig, data, tmp;
5455
5456         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5457                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5458                 tmp |= 0x3fff;
5459                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5460
5461                 orig = data = RREG32(UVD_CGC_CTRL);
5462                 data |= DCM;
5463                 if (orig != data)
5464                         WREG32(UVD_CGC_CTRL, data);
5465
5466                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5467                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5468         } else {
5469                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5470                 tmp &= ~0x3fff;
5471                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5472
5473                 orig = data = RREG32(UVD_CGC_CTRL);
5474                 data &= ~DCM;
5475                 if (orig != data)
5476                         WREG32(UVD_CGC_CTRL, data);
5477
5478                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5479                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5480         }
5481 }
5482
5483 static const u32 mc_cg_registers[] =
5484 {
5485         MC_HUB_MISC_HUB_CG,
5486         MC_HUB_MISC_SIP_CG,
5487         MC_HUB_MISC_VM_CG,
5488         MC_XPB_CLK_GAT,
5489         ATC_MISC_CG,
5490         MC_CITF_MISC_WR_CG,
5491         MC_CITF_MISC_RD_CG,
5492         MC_CITF_MISC_VM_CG,
5493         VM_L2_CG,
5494 };
5495
5496 static void si_enable_mc_ls(struct radeon_device *rdev,
5497                             bool enable)
5498 {
5499         int i;
5500         u32 orig, data;
5501
5502         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5503                 orig = data = RREG32(mc_cg_registers[i]);
5504                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5505                         data |= MC_LS_ENABLE;
5506                 else
5507                         data &= ~MC_LS_ENABLE;
5508                 if (data != orig)
5509                         WREG32(mc_cg_registers[i], data);
5510         }
5511 }
5512
5513 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5514                                bool enable)
5515 {
5516         int i;
5517         u32 orig, data;
5518
5519         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5520                 orig = data = RREG32(mc_cg_registers[i]);
5521                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5522                         data |= MC_CG_ENABLE;
5523                 else
5524                         data &= ~MC_CG_ENABLE;
5525                 if (data != orig)
5526                         WREG32(mc_cg_registers[i], data);
5527         }
5528 }
5529
5530 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5531                                bool enable)
5532 {
5533         u32 orig, data, offset;
5534         int i;
5535
5536         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5537                 for (i = 0; i < 2; i++) {
5538                         if (i == 0)
5539                                 offset = DMA0_REGISTER_OFFSET;
5540                         else
5541                                 offset = DMA1_REGISTER_OFFSET;
5542                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5543                         data &= ~MEM_POWER_OVERRIDE;
5544                         if (data != orig)
5545                                 WREG32(DMA_POWER_CNTL + offset, data);
5546                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5547                 }
5548         } else {
5549                 for (i = 0; i < 2; i++) {
5550                         if (i == 0)
5551                                 offset = DMA0_REGISTER_OFFSET;
5552                         else
5553                                 offset = DMA1_REGISTER_OFFSET;
5554                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5555                         data |= MEM_POWER_OVERRIDE;
5556                         if (data != orig)
5557                                 WREG32(DMA_POWER_CNTL + offset, data);
5558
5559                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5560                         data = 0xff000000;
5561                         if (data != orig)
5562                                 WREG32(DMA_CLK_CTRL + offset, data);
5563                 }
5564         }
5565 }
5566
5567 static void si_enable_bif_mgls(struct radeon_device *rdev,
5568                                bool enable)
5569 {
5570         u32 orig, data;
5571
5572         orig = data = RREG32_PCIE(PCIE_CNTL2);
5573
5574         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5575                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5576                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5577         else
5578                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5579                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5580
5581         if (orig != data)
5582                 WREG32_PCIE(PCIE_CNTL2, data);
5583 }
5584
5585 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5586                                bool enable)
5587 {
5588         u32 orig, data;
5589
5590         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5591
5592         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5593                 data &= ~CLOCK_GATING_DIS;
5594         else
5595                 data |= CLOCK_GATING_DIS;
5596
5597         if (orig != data)
5598                 WREG32(HDP_HOST_PATH_CNTL, data);
5599 }
5600
5601 static void si_enable_hdp_ls(struct radeon_device *rdev,
5602                              bool enable)
5603 {
5604         u32 orig, data;
5605
5606         orig = data = RREG32(HDP_MEM_POWER_LS);
5607
5608         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5609                 data |= HDP_LS_ENABLE;
5610         else
5611                 data &= ~HDP_LS_ENABLE;
5612
5613         if (orig != data)
5614                 WREG32(HDP_MEM_POWER_LS, data);
5615 }
5616
5617 static void si_update_cg(struct radeon_device *rdev,
5618                          u32 block, bool enable)
5619 {
5620         if (block & RADEON_CG_BLOCK_GFX) {
5621                 si_enable_gui_idle_interrupt(rdev, false);
5622                 /* order matters! */
5623                 if (enable) {
5624                         si_enable_mgcg(rdev, true);
5625                         si_enable_cgcg(rdev, true);
5626                 } else {
5627                         si_enable_cgcg(rdev, false);
5628                         si_enable_mgcg(rdev, false);
5629                 }
5630                 si_enable_gui_idle_interrupt(rdev, true);
5631         }
5632
5633         if (block & RADEON_CG_BLOCK_MC) {
5634                 si_enable_mc_mgcg(rdev, enable);
5635                 si_enable_mc_ls(rdev, enable);
5636         }
5637
5638         if (block & RADEON_CG_BLOCK_SDMA) {
5639                 si_enable_dma_mgcg(rdev, enable);
5640         }
5641
5642         if (block & RADEON_CG_BLOCK_BIF) {
5643                 si_enable_bif_mgls(rdev, enable);
5644         }
5645
5646         if (block & RADEON_CG_BLOCK_UVD) {
5647                 if (rdev->has_uvd) {
5648                         si_enable_uvd_mgcg(rdev, enable);
5649                 }
5650         }
5651
5652         if (block & RADEON_CG_BLOCK_HDP) {
5653                 si_enable_hdp_mgcg(rdev, enable);
5654                 si_enable_hdp_ls(rdev, enable);
5655         }
5656 }
5657
5658 static void si_init_cg(struct radeon_device *rdev)
5659 {
5660         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5661                             RADEON_CG_BLOCK_MC |
5662                             RADEON_CG_BLOCK_SDMA |
5663                             RADEON_CG_BLOCK_BIF |
5664                             RADEON_CG_BLOCK_HDP), true);
5665         if (rdev->has_uvd) {
5666                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5667                 si_init_uvd_internal_cg(rdev);
5668         }
5669 }
5670
5671 static void si_fini_cg(struct radeon_device *rdev)
5672 {
5673         if (rdev->has_uvd) {
5674                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5675         }
5676         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5677                             RADEON_CG_BLOCK_MC |
5678                             RADEON_CG_BLOCK_SDMA |
5679                             RADEON_CG_BLOCK_BIF |
5680                             RADEON_CG_BLOCK_HDP), false);
5681 }
5682
5683 u32 si_get_csb_size(struct radeon_device *rdev)
5684 {
5685         u32 count = 0;
5686         const struct cs_section_def *sect = NULL;
5687         const struct cs_extent_def *ext = NULL;
5688
5689         if (rdev->rlc.cs_data == NULL)
5690                 return 0;
5691
5692         /* begin clear state */
5693         count += 2;
5694         /* context control state */
5695         count += 3;
5696
5697         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5698                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5699                         if (sect->id == SECT_CONTEXT)
5700                                 count += 2 + ext->reg_count;
5701                         else
5702                                 return 0;
5703                 }
5704         }
5705         /* pa_sc_raster_config */
5706         count += 3;
5707         /* end clear state */
5708         count += 2;
5709         /* clear state */
5710         count += 2;
5711
5712         return count;
5713 }
5714
5715 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5716 {
5717         u32 count = 0, i;
5718         const struct cs_section_def *sect = NULL;
5719         const struct cs_extent_def *ext = NULL;
5720
5721         if (rdev->rlc.cs_data == NULL)
5722                 return;
5723         if (buffer == NULL)
5724                 return;
5725
5726         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5727         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5728
5729         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5730         buffer[count++] = cpu_to_le32(0x80000000);
5731         buffer[count++] = cpu_to_le32(0x80000000);
5732
5733         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5734                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5735                         if (sect->id == SECT_CONTEXT) {
5736                                 buffer[count++] =
5737                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5738                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5739                                 for (i = 0; i < ext->reg_count; i++)
5740                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5741                         } else {
5742                                 return;
5743                         }
5744                 }
5745         }
5746
5747         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5748         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5749         switch (rdev->family) {
5750         case CHIP_TAHITI:
5751         case CHIP_PITCAIRN:
5752                 buffer[count++] = cpu_to_le32(0x2a00126a);
5753                 break;
5754         case CHIP_VERDE:
5755                 buffer[count++] = cpu_to_le32(0x0000124a);
5756                 break;
5757         case CHIP_OLAND:
5758                 buffer[count++] = cpu_to_le32(0x00000082);
5759                 break;
5760         case CHIP_HAINAN:
5761                 buffer[count++] = cpu_to_le32(0x00000000);
5762                 break;
5763         default:
5764                 buffer[count++] = cpu_to_le32(0x00000000);
5765                 break;
5766         }
5767
5768         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5769         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5770
5771         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5772         buffer[count++] = cpu_to_le32(0);
5773 }
5774
5775 static void si_init_pg(struct radeon_device *rdev)
5776 {
5777         if (rdev->pg_flags) {
5778                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5779                         si_init_dma_pg(rdev);
5780                 }
5781                 si_init_ao_cu_mask(rdev);
5782                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5783                         si_init_gfx_cgpg(rdev);
5784                 } else {
5785                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5786                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5787                 }
5788                 si_enable_dma_pg(rdev, true);
5789                 si_enable_gfx_cgpg(rdev, true);
5790         } else {
5791                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5792                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5793         }
5794 }
5795
5796 static void si_fini_pg(struct radeon_device *rdev)
5797 {
5798         if (rdev->pg_flags) {
5799                 si_enable_dma_pg(rdev, false);
5800                 si_enable_gfx_cgpg(rdev, false);
5801         }
5802 }
5803
5804 /*
5805  * RLC
5806  */
5807 void si_rlc_reset(struct radeon_device *rdev)
5808 {
5809         u32 tmp = RREG32(GRBM_SOFT_RESET);
5810
5811         tmp |= SOFT_RESET_RLC;
5812         WREG32(GRBM_SOFT_RESET, tmp);
5813         udelay(50);
5814         tmp &= ~SOFT_RESET_RLC;
5815         WREG32(GRBM_SOFT_RESET, tmp);
5816         udelay(50);
5817 }
5818
5819 static void si_rlc_stop(struct radeon_device *rdev)
5820 {
5821         WREG32(RLC_CNTL, 0);
5822
5823         si_enable_gui_idle_interrupt(rdev, false);
5824
5825         si_wait_for_rlc_serdes(rdev);
5826 }
5827
5828 static void si_rlc_start(struct radeon_device *rdev)
5829 {
5830         WREG32(RLC_CNTL, RLC_ENABLE);
5831
5832         si_enable_gui_idle_interrupt(rdev, true);
5833
5834         udelay(50);
5835 }
5836
5837 static bool si_lbpw_supported(struct radeon_device *rdev)
5838 {
5839         u32 tmp;
5840
5841         /* Enable LBPW only for DDR3 */
5842         tmp = RREG32(MC_SEQ_MISC0);
5843         if ((tmp & 0xF0000000) == 0xB0000000)
5844                 return true;
5845         return false;
5846 }
5847
5848 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5849 {
5850         u32 tmp;
5851
5852         tmp = RREG32(RLC_LB_CNTL);
5853         if (enable)
5854                 tmp |= LOAD_BALANCE_ENABLE;
5855         else
5856                 tmp &= ~LOAD_BALANCE_ENABLE;
5857         WREG32(RLC_LB_CNTL, tmp);
5858
5859         if (!enable) {
5860                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5861                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5862         }
5863 }
5864
5865 static int si_rlc_resume(struct radeon_device *rdev)
5866 {
5867         u32 i;
5868
5869         if (!rdev->rlc_fw)
5870                 return -EINVAL;
5871
5872         si_rlc_stop(rdev);
5873
5874         si_rlc_reset(rdev);
5875
5876         si_init_pg(rdev);
5877
5878         si_init_cg(rdev);
5879
5880         WREG32(RLC_RL_BASE, 0);
5881         WREG32(RLC_RL_SIZE, 0);
5882         WREG32(RLC_LB_CNTL, 0);
5883         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5884         WREG32(RLC_LB_CNTR_INIT, 0);
5885         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5886
5887         WREG32(RLC_MC_CNTL, 0);
5888         WREG32(RLC_UCODE_CNTL, 0);
5889
5890         if (rdev->new_fw) {
5891                 const struct rlc_firmware_header_v1_0 *hdr =
5892                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5893                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5894                 const __le32 *fw_data = (const __le32 *)
5895                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5896
5897                 radeon_ucode_print_rlc_hdr(&hdr->header);
5898
5899                 for (i = 0; i < fw_size; i++) {
5900                         WREG32(RLC_UCODE_ADDR, i);
5901                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5902                 }
5903         } else {
5904                 const __be32 *fw_data =
5905                         (const __be32 *)rdev->rlc_fw->data;
5906                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5907                         WREG32(RLC_UCODE_ADDR, i);
5908                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5909                 }
5910         }
5911         WREG32(RLC_UCODE_ADDR, 0);
5912
5913         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5914
5915         si_rlc_start(rdev);
5916
5917         return 0;
5918 }
5919
5920 static void si_enable_interrupts(struct radeon_device *rdev)
5921 {
5922         u32 ih_cntl = RREG32(IH_CNTL);
5923         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5924
5925         ih_cntl |= ENABLE_INTR;
5926         ih_rb_cntl |= IH_RB_ENABLE;
5927         WREG32(IH_CNTL, ih_cntl);
5928         WREG32(IH_RB_CNTL, ih_rb_cntl);
5929         rdev->ih.enabled = true;
5930 }
5931
5932 static void si_disable_interrupts(struct radeon_device *rdev)
5933 {
5934         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5935         u32 ih_cntl = RREG32(IH_CNTL);
5936
5937         ih_rb_cntl &= ~IH_RB_ENABLE;
5938         ih_cntl &= ~ENABLE_INTR;
5939         WREG32(IH_RB_CNTL, ih_rb_cntl);
5940         WREG32(IH_CNTL, ih_cntl);
5941         /* set rptr, wptr to 0 */
5942         WREG32(IH_RB_RPTR, 0);
5943         WREG32(IH_RB_WPTR, 0);
5944         rdev->ih.enabled = false;
5945         rdev->ih.rptr = 0;
5946 }
5947
5948 static void si_disable_interrupt_state(struct radeon_device *rdev)
5949 {
5950         int i;
5951         u32 tmp;
5952
5953         tmp = RREG32(CP_INT_CNTL_RING0) &
5954                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5955         WREG32(CP_INT_CNTL_RING0, tmp);
5956         WREG32(CP_INT_CNTL_RING1, 0);
5957         WREG32(CP_INT_CNTL_RING2, 0);
5958         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5959         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5960         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5961         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5962         WREG32(GRBM_INT_CNTL, 0);
5963         WREG32(SRBM_INT_CNTL, 0);
5964         for (i = 0; i < rdev->num_crtc; i++)
5965                 WREG32(INT_MASK + crtc_offsets[i], 0);
5966         for (i = 0; i < rdev->num_crtc; i++)
5967                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5968
5969         if (!ASIC_IS_NODCE(rdev)) {
5970                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5971
5972                 for (i = 0; i < 6; i++)
5973                         WREG32_AND(DC_HPDx_INT_CONTROL(i),
5974                                    DC_HPDx_INT_POLARITY);
5975         }
5976 }
5977
5978 static int si_irq_init(struct radeon_device *rdev)
5979 {
5980         int ret = 0;
5981         int rb_bufsz;
5982         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5983
5984         /* allocate ring */
5985         ret = r600_ih_ring_alloc(rdev);
5986         if (ret)
5987                 return ret;
5988
5989         /* disable irqs */
5990         si_disable_interrupts(rdev);
5991
5992         /* init rlc */
5993         ret = si_rlc_resume(rdev);
5994         if (ret) {
5995                 r600_ih_ring_fini(rdev);
5996                 return ret;
5997         }
5998
5999         /* setup interrupt control */
6000         /* set dummy read address to dummy page address */
6001         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6002         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6003         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6004          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6005          */
6006         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6007         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6008         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6009         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6010
6011         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6012         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6013
6014         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6015                       IH_WPTR_OVERFLOW_CLEAR |
6016                       (rb_bufsz << 1));
6017
6018         if (rdev->wb.enabled)
6019                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6020
6021         /* set the writeback address whether it's enabled or not */
6022         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6023         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6024
6025         WREG32(IH_RB_CNTL, ih_rb_cntl);
6026
6027         /* set rptr, wptr to 0 */
6028         WREG32(IH_RB_RPTR, 0);
6029         WREG32(IH_RB_WPTR, 0);
6030
6031         /* Default settings for IH_CNTL (disabled at first) */
6032         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6033         /* RPTR_REARM only works if msi's are enabled */
6034         if (rdev->msi_enabled)
6035                 ih_cntl |= RPTR_REARM;
6036         WREG32(IH_CNTL, ih_cntl);
6037
6038         /* force the active interrupt state to all disabled */
6039         si_disable_interrupt_state(rdev);
6040
6041         pci_set_master(rdev->pdev);
6042
6043         /* enable irqs */
6044         si_enable_interrupts(rdev);
6045
6046         return ret;
6047 }
6048
6049 /* The order we write back each register here is important */
6050 int si_irq_set(struct radeon_device *rdev)
6051 {
6052         int i;
6053         u32 cp_int_cntl;
6054         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6055         u32 grbm_int_cntl = 0;
6056         u32 dma_cntl, dma_cntl1;
6057         u32 thermal_int = 0;
6058
6059         if (!rdev->irq.installed) {
6060                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6061                 return -EINVAL;
6062         }
6063         /* don't enable anything if the ih is disabled */
6064         if (!rdev->ih.enabled) {
6065                 si_disable_interrupts(rdev);
6066                 /* force the active interrupt state to all disabled */
6067                 si_disable_interrupt_state(rdev);
6068                 return 0;
6069         }
6070
6071         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6072                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6073
6074         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6075         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6076
6077         thermal_int = RREG32(CG_THERMAL_INT) &
6078                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6079
6080         /* enable CP interrupts on all rings */
6081         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6082                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6083                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6084         }
6085         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6086                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6087                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6088         }
6089         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6090                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6091                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6092         }
6093         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6094                 DRM_DEBUG("si_irq_set: sw int dma\n");
6095                 dma_cntl |= TRAP_ENABLE;
6096         }
6097
6098         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6099                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6100                 dma_cntl1 |= TRAP_ENABLE;
6101         }
6102
6103         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6104         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6105         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6106
6107         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6108         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6109
6110         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6111
6112         if (rdev->irq.dpm_thermal) {
6113                 DRM_DEBUG("dpm thermal\n");
6114                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6115         }
6116
6117         for (i = 0; i < rdev->num_crtc; i++) {
6118                 radeon_irq_kms_set_irq_n_enabled(
6119                     rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6120                     rdev->irq.crtc_vblank_int[i] ||
6121                     atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6122         }
6123
6124         for (i = 0; i < rdev->num_crtc; i++)
6125                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6126
6127         if (!ASIC_IS_NODCE(rdev)) {
6128                 for (i = 0; i < 6; i++) {
6129                         radeon_irq_kms_set_irq_n_enabled(
6130                             rdev, DC_HPDx_INT_CONTROL(i),
6131                             DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6132                             rdev->irq.hpd[i], "HPD", i);
6133                 }
6134         }
6135
6136         WREG32(CG_THERMAL_INT, thermal_int);
6137
6138         /* posting read */
6139         RREG32(SRBM_STATUS);
6140
6141         return 0;
6142 }
6143
6144 /* The order we write back each register here is important */
6145 static inline void si_irq_ack(struct radeon_device *rdev)
6146 {
6147         int i, j;
6148         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6149         u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6150
6151         if (ASIC_IS_NODCE(rdev))
6152                 return;
6153
6154         for (i = 0; i < 6; i++) {
6155                 disp_int[i] = RREG32(si_disp_int_status[i]);
6156                 if (i < rdev->num_crtc)
6157                         grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6158         }
6159
6160         /* We write back each interrupt register in pairs of two */
6161         for (i = 0; i < rdev->num_crtc; i += 2) {
6162                 for (j = i; j < (i + 2); j++) {
6163                         if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6164                                 WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6165                                        GRPH_PFLIP_INT_CLEAR);
6166                 }
6167
6168                 for (j = i; j < (i + 2); j++) {
6169                         if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6170                                 WREG32(VBLANK_STATUS + crtc_offsets[j],
6171                                        VBLANK_ACK);
6172                         if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6173                                 WREG32(VLINE_STATUS + crtc_offsets[j],
6174                                        VLINE_ACK);
6175                 }
6176         }
6177
6178         for (i = 0; i < 6; i++) {
6179                 if (disp_int[i] & DC_HPD1_INTERRUPT)
6180                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6181         }
6182
6183         for (i = 0; i < 6; i++) {
6184                 if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6185                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6186         }
6187 }
6188
6189 static void si_irq_disable(struct radeon_device *rdev)
6190 {
6191         si_disable_interrupts(rdev);
6192         /* Wait and acknowledge irq */
6193         mdelay(1);
6194         si_irq_ack(rdev);
6195         si_disable_interrupt_state(rdev);
6196 }
6197
6198 static void si_irq_suspend(struct radeon_device *rdev)
6199 {
6200         si_irq_disable(rdev);
6201         si_rlc_stop(rdev);
6202 }
6203
6204 static void si_irq_fini(struct radeon_device *rdev)
6205 {
6206         si_irq_suspend(rdev);
6207         r600_ih_ring_fini(rdev);
6208 }
6209
6210 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6211 {
6212         u32 wptr, tmp;
6213
6214         if (rdev->wb.enabled)
6215                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6216         else
6217                 wptr = RREG32(IH_RB_WPTR);
6218
6219         if (wptr & RB_OVERFLOW) {
6220                 wptr &= ~RB_OVERFLOW;
6221                 /* When a ring buffer overflow happen start parsing interrupt
6222                  * from the last not overwritten vector (wptr + 16). Hopefully
6223                  * this should allow us to catchup.
6224                  */
6225                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6226                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6227                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6228                 tmp = RREG32(IH_RB_CNTL);
6229                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6230                 WREG32(IH_RB_CNTL, tmp);
6231         }
6232         return (wptr & rdev->ih.ptr_mask);
6233 }
6234
6235 /*        SI IV Ring
6236  * Each IV ring entry is 128 bits:
6237  * [7:0]    - interrupt source id
6238  * [31:8]   - reserved
6239  * [59:32]  - interrupt source data
6240  * [63:60]  - reserved
6241  * [71:64]  - RINGID
6242  * [79:72]  - VMID
6243  * [127:80] - reserved
6244  */
6245 int si_irq_process(struct radeon_device *rdev)
6246 {
6247         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6248         u32 crtc_idx, hpd_idx;
6249         u32 mask;
6250         u32 wptr;
6251         u32 rptr;
6252         u32 src_id, src_data, ring_id;
6253         u32 ring_index;
6254         bool queue_hotplug = false;
6255         bool queue_dp = false;
6256         bool queue_thermal = false;
6257         u32 status, addr;
6258         const char *event_name;
6259
6260         if (!rdev->ih.enabled || rdev->shutdown)
6261                 return IRQ_NONE;
6262
6263         wptr = si_get_ih_wptr(rdev);
6264
6265 restart_ih:
6266         /* is somebody else already processing irqs? */
6267         if (atomic_xchg(&rdev->ih.lock, 1))
6268                 return IRQ_NONE;
6269
6270         rptr = rdev->ih.rptr;
6271         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6272
6273         /* Order reading of wptr vs. reading of IH ring data */
6274         rmb();
6275
6276         /* display interrupts */
6277         si_irq_ack(rdev);
6278
6279         while (rptr != wptr) {
6280                 /* wptr/rptr are in bytes! */
6281                 ring_index = rptr / 4;
6282                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6283                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6284                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6285
6286                 switch (src_id) {
6287                 case 1: /* D1 vblank/vline */
6288                 case 2: /* D2 vblank/vline */
6289                 case 3: /* D3 vblank/vline */
6290                 case 4: /* D4 vblank/vline */
6291                 case 5: /* D5 vblank/vline */
6292                 case 6: /* D6 vblank/vline */
6293                         crtc_idx = src_id - 1;
6294
6295                         if (src_data == 0) { /* vblank */
6296                                 mask = LB_D1_VBLANK_INTERRUPT;
6297                                 event_name = "vblank";
6298
6299                                 if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6300                                         drm_handle_vblank(rdev->ddev, crtc_idx);
6301                                         rdev->pm.vblank_sync = true;
6302                                         wake_up(&rdev->irq.vblank_queue);
6303                                 }
6304                                 if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6305                                         radeon_crtc_handle_vblank(rdev,
6306                                                                   crtc_idx);
6307                                 }
6308
6309                         } else if (src_data == 1) { /* vline */
6310                                 mask = LB_D1_VLINE_INTERRUPT;
6311                                 event_name = "vline";
6312                         } else {
6313                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6314                                           src_id, src_data);
6315                                 break;
6316                         }
6317
6318                         if (!(disp_int[crtc_idx] & mask)) {
6319                                 DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6320                                           crtc_idx + 1, event_name);
6321                         }
6322
6323                         disp_int[crtc_idx] &= ~mask;
6324                         DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6325
6326                         break;
6327                 case 8: /* D1 page flip */
6328                 case 10: /* D2 page flip */
6329                 case 12: /* D3 page flip */
6330                 case 14: /* D4 page flip */
6331                 case 16: /* D5 page flip */
6332                 case 18: /* D6 page flip */
6333                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6334                         if (radeon_use_pflipirq > 0)
6335                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6336                         break;
6337                 case 42: /* HPD hotplug */
6338                         if (src_data <= 5) {
6339                                 hpd_idx = src_data;
6340                                 mask = DC_HPD1_INTERRUPT;
6341                                 queue_hotplug = true;
6342                                 event_name = "HPD";
6343
6344                         } else if (src_data <= 11) {
6345                                 hpd_idx = src_data - 6;
6346                                 mask = DC_HPD1_RX_INTERRUPT;
6347                                 queue_dp = true;
6348                                 event_name = "HPD_RX";
6349
6350                         } else {
6351                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6352                                           src_id, src_data);
6353                                 break;
6354                         }
6355
6356                         if (!(disp_int[hpd_idx] & mask))
6357                                 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6358
6359                         disp_int[hpd_idx] &= ~mask;
6360                         DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6361                         break;
6362                 case 96:
6363                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6364                         WREG32(SRBM_INT_ACK, 0x1);
6365                         break;
6366                 case 124: /* UVD */
6367                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6368                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6369                         break;
6370                 case 146:
6371                 case 147:
6372                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6373                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6374                         /* reset addr and status */
6375                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6376                         if (addr == 0x0 && status == 0x0)
6377                                 break;
6378                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6379                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6380                                 addr);
6381                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6382                                 status);
6383                         si_vm_decode_fault(rdev, status, addr);
6384                         break;
6385                 case 176: /* RINGID0 CP_INT */
6386                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6387                         break;
6388                 case 177: /* RINGID1 CP_INT */
6389                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6390                         break;
6391                 case 178: /* RINGID2 CP_INT */
6392                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6393                         break;
6394                 case 181: /* CP EOP event */
6395                         DRM_DEBUG("IH: CP EOP\n");
6396                         switch (ring_id) {
6397                         case 0:
6398                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6399                                 break;
6400                         case 1:
6401                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6402                                 break;
6403                         case 2:
6404                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6405                                 break;
6406                         }
6407                         break;
6408                 case 224: /* DMA trap event */
6409                         DRM_DEBUG("IH: DMA trap\n");
6410                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6411                         break;
6412                 case 230: /* thermal low to high */
6413                         DRM_DEBUG("IH: thermal low to high\n");
6414                         rdev->pm.dpm.thermal.high_to_low = false;
6415                         queue_thermal = true;
6416                         break;
6417                 case 231: /* thermal high to low */
6418                         DRM_DEBUG("IH: thermal high to low\n");
6419                         rdev->pm.dpm.thermal.high_to_low = true;
6420                         queue_thermal = true;
6421                         break;
6422                 case 233: /* GUI IDLE */
6423                         DRM_DEBUG("IH: GUI idle\n");
6424                         break;
6425                 case 244: /* DMA trap event */
6426                         DRM_DEBUG("IH: DMA1 trap\n");
6427                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6428                         break;
6429                 default:
6430                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6431                         break;
6432                 }
6433
6434                 /* wptr/rptr are in bytes! */
6435                 rptr += 16;
6436                 rptr &= rdev->ih.ptr_mask;
6437                 WREG32(IH_RB_RPTR, rptr);
6438         }
6439         if (queue_dp)
6440                 schedule_work(&rdev->dp_work);
6441         if (queue_hotplug)
6442                 schedule_delayed_work(&rdev->hotplug_work, 0);
6443         if (queue_thermal && rdev->pm.dpm_enabled)
6444                 schedule_work(&rdev->pm.dpm.thermal.work);
6445         rdev->ih.rptr = rptr;
6446         atomic_set(&rdev->ih.lock, 0);
6447
6448         /* make sure wptr hasn't changed while processing */
6449         wptr = si_get_ih_wptr(rdev);
6450         if (wptr != rptr)
6451                 goto restart_ih;
6452
6453         return IRQ_HANDLED;
6454 }
6455
6456 /*
6457  * startup/shutdown callbacks
6458  */
6459 static void si_uvd_init(struct radeon_device *rdev)
6460 {
6461         int r;
6462
6463         if (!rdev->has_uvd)
6464                 return;
6465
6466         r = radeon_uvd_init(rdev);
6467         if (r) {
6468                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6469                 /*
6470                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6471                  * to early fails uvd_v2_2_resume() and thus nothing happens
6472                  * there. So it is pointless to try to go through that code
6473                  * hence why we disable uvd here.
6474                  */
6475                 rdev->has_uvd = 0;
6476                 return;
6477         }
6478         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6479         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6480 }
6481
6482 static void si_uvd_start(struct radeon_device *rdev)
6483 {
6484         int r;
6485
6486         if (!rdev->has_uvd)
6487                 return;
6488
6489         r = uvd_v2_2_resume(rdev);
6490         if (r) {
6491                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6492                 goto error;
6493         }
6494         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6495         if (r) {
6496                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6497                 goto error;
6498         }
6499         return;
6500
6501 error:
6502         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6503 }
6504
6505 static void si_uvd_resume(struct radeon_device *rdev)
6506 {
6507         struct radeon_ring *ring;
6508         int r;
6509
6510         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6511                 return;
6512
6513         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6514         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6515         if (r) {
6516                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6517                 return;
6518         }
6519         r = uvd_v1_0_init(rdev);
6520         if (r) {
6521                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6522                 return;
6523         }
6524 }
6525
6526 static void si_vce_init(struct radeon_device *rdev)
6527 {
6528         int r;
6529
6530         if (!rdev->has_vce)
6531                 return;
6532
6533         r = radeon_vce_init(rdev);
6534         if (r) {
6535                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6536                 /*
6537                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
6538                  * to early fails si_vce_start() and thus nothing happens
6539                  * there. So it is pointless to try to go through that code
6540                  * hence why we disable vce here.
6541                  */
6542                 rdev->has_vce = 0;
6543                 return;
6544         }
6545         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6546         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6547         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6548         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6549 }
6550
6551 static void si_vce_start(struct radeon_device *rdev)
6552 {
6553         int r;
6554
6555         if (!rdev->has_vce)
6556                 return;
6557
6558         r = radeon_vce_resume(rdev);
6559         if (r) {
6560                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6561                 goto error;
6562         }
6563         r = vce_v1_0_resume(rdev);
6564         if (r) {
6565                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6566                 goto error;
6567         }
6568         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6569         if (r) {
6570                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6571                 goto error;
6572         }
6573         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6574         if (r) {
6575                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6576                 goto error;
6577         }
6578         return;
6579
6580 error:
6581         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6582         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6583 }
6584
6585 static void si_vce_resume(struct radeon_device *rdev)
6586 {
6587         struct radeon_ring *ring;
6588         int r;
6589
6590         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6591                 return;
6592
6593         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6594         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6595         if (r) {
6596                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6597                 return;
6598         }
6599         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6600         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6601         if (r) {
6602                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6603                 return;
6604         }
6605         r = vce_v1_0_init(rdev);
6606         if (r) {
6607                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6608                 return;
6609         }
6610 }
6611
6612 static int si_startup(struct radeon_device *rdev)
6613 {
6614         struct radeon_ring *ring;
6615         int r;
6616
6617         /* enable pcie gen2/3 link */
6618         si_pcie_gen3_enable(rdev);
6619         /* enable aspm */
6620         si_program_aspm(rdev);
6621
6622         /* scratch needs to be initialized before MC */
6623         r = r600_vram_scratch_init(rdev);
6624         if (r)
6625                 return r;
6626
6627         si_mc_program(rdev);
6628
6629         if (!rdev->pm.dpm_enabled) {
6630                 r = si_mc_load_microcode(rdev);
6631                 if (r) {
6632                         DRM_ERROR("Failed to load MC firmware!\n");
6633                         return r;
6634                 }
6635         }
6636
6637         r = si_pcie_gart_enable(rdev);
6638         if (r)
6639                 return r;
6640         si_gpu_init(rdev);
6641
6642         /* allocate rlc buffers */
6643         if (rdev->family == CHIP_VERDE) {
6644                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6645                 rdev->rlc.reg_list_size =
6646                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6647         }
6648         rdev->rlc.cs_data = si_cs_data;
6649         r = sumo_rlc_init(rdev);
6650         if (r) {
6651                 DRM_ERROR("Failed to init rlc BOs!\n");
6652                 return r;
6653         }
6654
6655         /* allocate wb buffer */
6656         r = radeon_wb_init(rdev);
6657         if (r)
6658                 return r;
6659
6660         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6661         if (r) {
6662                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6663                 return r;
6664         }
6665
6666         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6667         if (r) {
6668                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6669                 return r;
6670         }
6671
6672         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6673         if (r) {
6674                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6675                 return r;
6676         }
6677
6678         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6679         if (r) {
6680                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6681                 return r;
6682         }
6683
6684         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6685         if (r) {
6686                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6687                 return r;
6688         }
6689
6690         si_uvd_start(rdev);
6691         si_vce_start(rdev);
6692
6693         /* Enable IRQ */
6694         if (!rdev->irq.installed) {
6695                 r = radeon_irq_kms_init(rdev);
6696                 if (r)
6697                         return r;
6698         }
6699
6700         r = si_irq_init(rdev);
6701         if (r) {
6702                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6703                 radeon_irq_kms_fini(rdev);
6704                 return r;
6705         }
6706         si_irq_set(rdev);
6707
6708         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6709         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6710                              RADEON_CP_PACKET2);
6711         if (r)
6712                 return r;
6713
6714         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6715         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6716                              RADEON_CP_PACKET2);
6717         if (r)
6718                 return r;
6719
6720         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6721         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6722                              RADEON_CP_PACKET2);
6723         if (r)
6724                 return r;
6725
6726         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6727         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6728                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6729         if (r)
6730                 return r;
6731
6732         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6733         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6734                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6735         if (r)
6736                 return r;
6737
6738         r = si_cp_load_microcode(rdev);
6739         if (r)
6740                 return r;
6741         r = si_cp_resume(rdev);
6742         if (r)
6743                 return r;
6744
6745         r = cayman_dma_resume(rdev);
6746         if (r)
6747                 return r;
6748
6749         si_uvd_resume(rdev);
6750         si_vce_resume(rdev);
6751
6752         r = radeon_ib_pool_init(rdev);
6753         if (r) {
6754                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6755                 return r;
6756         }
6757
6758         r = radeon_vm_manager_init(rdev);
6759         if (r) {
6760                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6761                 return r;
6762         }
6763
6764         r = radeon_audio_init(rdev);
6765         if (r)
6766                 return r;
6767
6768         return 0;
6769 }
6770
6771 int si_resume(struct radeon_device *rdev)
6772 {
6773         int r;
6774
6775         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6776          * posting will perform necessary task to bring back GPU into good
6777          * shape.
6778          */
6779         /* post card */
6780         atom_asic_init(rdev->mode_info.atom_context);
6781
6782         /* init golden registers */
6783         si_init_golden_registers(rdev);
6784
6785         if (rdev->pm.pm_method == PM_METHOD_DPM)
6786                 radeon_pm_resume(rdev);
6787
6788         rdev->accel_working = true;
6789         r = si_startup(rdev);
6790         if (r) {
6791                 DRM_ERROR("si startup failed on resume\n");
6792                 rdev->accel_working = false;
6793                 return r;
6794         }
6795
6796         return r;
6797
6798 }
6799
6800 int si_suspend(struct radeon_device *rdev)
6801 {
6802         radeon_pm_suspend(rdev);
6803         radeon_audio_fini(rdev);
6804         radeon_vm_manager_fini(rdev);
6805         si_cp_enable(rdev, false);
6806         cayman_dma_stop(rdev);
6807         if (rdev->has_uvd) {
6808                 uvd_v1_0_fini(rdev);
6809                 radeon_uvd_suspend(rdev);
6810         }
6811         if (rdev->has_vce)
6812                 radeon_vce_suspend(rdev);
6813         si_fini_pg(rdev);
6814         si_fini_cg(rdev);
6815         si_irq_suspend(rdev);
6816         radeon_wb_disable(rdev);
6817         si_pcie_gart_disable(rdev);
6818         return 0;
6819 }
6820
6821 /* Plan is to move initialization in that function and use
6822  * helper function so that radeon_device_init pretty much
6823  * do nothing more than calling asic specific function. This
6824  * should also allow to remove a bunch of callback function
6825  * like vram_info.
6826  */
6827 int si_init(struct radeon_device *rdev)
6828 {
6829         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6830         int r;
6831
6832         /* Read BIOS */
6833         if (!radeon_get_bios(rdev)) {
6834                 if (ASIC_IS_AVIVO(rdev))
6835                         return -EINVAL;
6836         }
6837         /* Must be an ATOMBIOS */
6838         if (!rdev->is_atom_bios) {
6839                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6840                 return -EINVAL;
6841         }
6842         r = radeon_atombios_init(rdev);
6843         if (r)
6844                 return r;
6845
6846         /* Post card if necessary */
6847         if (!radeon_card_posted(rdev)) {
6848                 if (!rdev->bios) {
6849                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6850                         return -EINVAL;
6851                 }
6852                 DRM_INFO("GPU not posted. posting now...\n");
6853                 atom_asic_init(rdev->mode_info.atom_context);
6854         }
6855         /* init golden registers */
6856         si_init_golden_registers(rdev);
6857         /* Initialize scratch registers */
6858         si_scratch_init(rdev);
6859         /* Initialize surface registers */
6860         radeon_surface_init(rdev);
6861         /* Initialize clocks */
6862         radeon_get_clock_info(rdev->ddev);
6863
6864         /* Fence driver */
6865         r = radeon_fence_driver_init(rdev);
6866         if (r)
6867                 return r;
6868
6869         /* initialize memory controller */
6870         r = si_mc_init(rdev);
6871         if (r)
6872                 return r;
6873         /* Memory manager */
6874         r = radeon_bo_init(rdev);
6875         if (r)
6876                 return r;
6877
6878         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6879             !rdev->rlc_fw || !rdev->mc_fw) {
6880                 r = si_init_microcode(rdev);
6881                 if (r) {
6882                         DRM_ERROR("Failed to load firmware!\n");
6883                         return r;
6884                 }
6885         }
6886
6887         /* Initialize power management */
6888         radeon_pm_init(rdev);
6889
6890         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6891         ring->ring_obj = NULL;
6892         r600_ring_init(rdev, ring, 1024 * 1024);
6893
6894         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6895         ring->ring_obj = NULL;
6896         r600_ring_init(rdev, ring, 1024 * 1024);
6897
6898         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6899         ring->ring_obj = NULL;
6900         r600_ring_init(rdev, ring, 1024 * 1024);
6901
6902         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6903         ring->ring_obj = NULL;
6904         r600_ring_init(rdev, ring, 64 * 1024);
6905
6906         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6907         ring->ring_obj = NULL;
6908         r600_ring_init(rdev, ring, 64 * 1024);
6909
6910         si_uvd_init(rdev);
6911         si_vce_init(rdev);
6912
6913         rdev->ih.ring_obj = NULL;
6914         r600_ih_ring_init(rdev, 64 * 1024);
6915
6916         r = r600_pcie_gart_init(rdev);
6917         if (r)
6918                 return r;
6919
6920         rdev->accel_working = true;
6921         r = si_startup(rdev);
6922         if (r) {
6923                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6924                 si_cp_fini(rdev);
6925                 cayman_dma_fini(rdev);
6926                 si_irq_fini(rdev);
6927                 sumo_rlc_fini(rdev);
6928                 radeon_wb_fini(rdev);
6929                 radeon_ib_pool_fini(rdev);
6930                 radeon_vm_manager_fini(rdev);
6931                 radeon_irq_kms_fini(rdev);
6932                 si_pcie_gart_fini(rdev);
6933                 rdev->accel_working = false;
6934         }
6935
6936         /* Don't start up if the MC ucode is missing.
6937          * The default clocks and voltages before the MC ucode
6938          * is loaded are not suffient for advanced operations.
6939          */
6940         if (!rdev->mc_fw) {
6941                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6942                 return -EINVAL;
6943         }
6944
6945         return 0;
6946 }
6947
6948 void si_fini(struct radeon_device *rdev)
6949 {
6950         radeon_pm_fini(rdev);
6951         si_cp_fini(rdev);
6952         cayman_dma_fini(rdev);
6953         si_fini_pg(rdev);
6954         si_fini_cg(rdev);
6955         si_irq_fini(rdev);
6956         sumo_rlc_fini(rdev);
6957         radeon_wb_fini(rdev);
6958         radeon_vm_manager_fini(rdev);
6959         radeon_ib_pool_fini(rdev);
6960         radeon_irq_kms_fini(rdev);
6961         if (rdev->has_uvd) {
6962                 uvd_v1_0_fini(rdev);
6963                 radeon_uvd_fini(rdev);
6964         }
6965         if (rdev->has_vce)
6966                 radeon_vce_fini(rdev);
6967         si_pcie_gart_fini(rdev);
6968         r600_vram_scratch_fini(rdev);
6969         radeon_gem_fini(rdev);
6970         radeon_fence_driver_fini(rdev);
6971         radeon_bo_fini(rdev);
6972         radeon_atombios_fini(rdev);
6973         kfree(rdev->bios);
6974         rdev->bios = NULL;
6975 }
6976
6977 /**
6978  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6979  *
6980  * @rdev: radeon_device pointer
6981  *
6982  * Fetches a GPU clock counter snapshot (SI).
6983  * Returns the 64 bit clock counter snapshot.
6984  */
6985 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6986 {
6987         uint64_t clock;
6988
6989         mutex_lock(&rdev->gpu_clock_mutex);
6990         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6991         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6992                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6993         mutex_unlock(&rdev->gpu_clock_mutex);
6994         return clock;
6995 }
6996
6997 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6998 {
6999         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7000         int r;
7001
7002         /* bypass vclk and dclk with bclk */
7003         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7004                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7005                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7006
7007         /* put PLL in bypass mode */
7008         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7009
7010         if (!vclk || !dclk) {
7011                 /* keep the Bypass mode */
7012                 return 0;
7013         }
7014
7015         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7016                                           16384, 0x03FFFFFF, 0, 128, 5,
7017                                           &fb_div, &vclk_div, &dclk_div);
7018         if (r)
7019                 return r;
7020
7021         /* set RESET_ANTI_MUX to 0 */
7022         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7023
7024         /* set VCO_MODE to 1 */
7025         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7026
7027         /* disable sleep mode */
7028         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7029
7030         /* deassert UPLL_RESET */
7031         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7032
7033         mdelay(1);
7034
7035         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7036         if (r)
7037                 return r;
7038
7039         /* assert UPLL_RESET again */
7040         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7041
7042         /* disable spread spectrum. */
7043         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7044
7045         /* set feedback divider */
7046         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7047
7048         /* set ref divider to 0 */
7049         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7050
7051         if (fb_div < 307200)
7052                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7053         else
7054                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7055
7056         /* set PDIV_A and PDIV_B */
7057         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7058                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7059                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7060
7061         /* give the PLL some time to settle */
7062         mdelay(15);
7063
7064         /* deassert PLL_RESET */
7065         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7066
7067         mdelay(15);
7068
7069         /* switch from bypass mode to normal mode */
7070         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7071
7072         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7073         if (r)
7074                 return r;
7075
7076         /* switch VCLK and DCLK selection */
7077         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7078                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7079                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7080
7081         mdelay(100);
7082
7083         return 0;
7084 }
7085
7086 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7087 {
7088         struct pci_dev *root = rdev->pdev->bus->self;
7089         enum pci_bus_speed speed_cap;
7090         u32 speed_cntl, current_data_rate;
7091         int i;
7092         u16 tmp16;
7093
7094         if (pci_is_root_bus(rdev->pdev->bus))
7095                 return;
7096
7097         if (radeon_pcie_gen2 == 0)
7098                 return;
7099
7100         if (rdev->flags & RADEON_IS_IGP)
7101                 return;
7102
7103         if (!(rdev->flags & RADEON_IS_PCIE))
7104                 return;
7105
7106         speed_cap = pcie_get_speed_cap(root);
7107         if (speed_cap == PCI_SPEED_UNKNOWN)
7108                 return;
7109
7110         if ((speed_cap != PCIE_SPEED_8_0GT) &&
7111             (speed_cap != PCIE_SPEED_5_0GT))
7112                 return;
7113
7114         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7115         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7116                 LC_CURRENT_DATA_RATE_SHIFT;
7117         if (speed_cap == PCIE_SPEED_8_0GT) {
7118                 if (current_data_rate == 2) {
7119                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7120                         return;
7121                 }
7122                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7123         } else if (speed_cap == PCIE_SPEED_5_0GT) {
7124                 if (current_data_rate == 1) {
7125                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7126                         return;
7127                 }
7128                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7129         }
7130
7131         if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7132                 return;
7133
7134         if (speed_cap == PCIE_SPEED_8_0GT) {
7135                 /* re-try equalization if gen3 is not already enabled */
7136                 if (current_data_rate != 2) {
7137                         u16 bridge_cfg, gpu_cfg;
7138                         u16 bridge_cfg2, gpu_cfg2;
7139                         u32 max_lw, current_lw, tmp;
7140
7141                         pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7142                                                   &bridge_cfg);
7143                         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
7144                                                   &gpu_cfg);
7145
7146                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7147                         pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
7148
7149                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7150                         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
7151                                                    tmp16);
7152
7153                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7154                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7155                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7156
7157                         if (current_lw < max_lw) {
7158                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7159                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7160                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7161                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7162                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7163                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7164                                 }
7165                         }
7166
7167                         for (i = 0; i < 10; i++) {
7168                                 /* check status */
7169                                 pcie_capability_read_word(rdev->pdev,
7170                                                           PCI_EXP_DEVSTA,
7171                                                           &tmp16);
7172                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7173                                         break;
7174
7175                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7176                                                           &bridge_cfg);
7177                                 pcie_capability_read_word(rdev->pdev,
7178                                                           PCI_EXP_LNKCTL,
7179                                                           &gpu_cfg);
7180
7181                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7182                                                           &bridge_cfg2);
7183                                 pcie_capability_read_word(rdev->pdev,
7184                                                           PCI_EXP_LNKCTL2,
7185                                                           &gpu_cfg2);
7186
7187                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7188                                 tmp |= LC_SET_QUIESCE;
7189                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7190
7191                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7192                                 tmp |= LC_REDO_EQ;
7193                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7194
7195                                 msleep(100);
7196
7197                                 /* linkctl */
7198                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7199                                                           &tmp16);
7200                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7201                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7202                                 pcie_capability_write_word(root,
7203                                                            PCI_EXP_LNKCTL,
7204                                                            tmp16);
7205
7206                                 pcie_capability_read_word(rdev->pdev,
7207                                                           PCI_EXP_LNKCTL,
7208                                                           &tmp16);
7209                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7210                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7211                                 pcie_capability_write_word(rdev->pdev,
7212                                                            PCI_EXP_LNKCTL,
7213                                                            tmp16);
7214
7215                                 /* linkctl2 */
7216                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7217                                                           &tmp16);
7218                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7219                                            PCI_EXP_LNKCTL2_TX_MARGIN);
7220                                 tmp16 |= (bridge_cfg2 &
7221                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
7222                                            PCI_EXP_LNKCTL2_TX_MARGIN));
7223                                 pcie_capability_write_word(root,
7224                                                            PCI_EXP_LNKCTL2,
7225                                                            tmp16);
7226
7227                                 pcie_capability_read_word(rdev->pdev,
7228                                                           PCI_EXP_LNKCTL2,
7229                                                           &tmp16);
7230                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7231                                            PCI_EXP_LNKCTL2_TX_MARGIN);
7232                                 tmp16 |= (gpu_cfg2 &
7233                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
7234                                            PCI_EXP_LNKCTL2_TX_MARGIN));
7235                                 pcie_capability_write_word(rdev->pdev,
7236                                                            PCI_EXP_LNKCTL2,
7237                                                            tmp16);
7238
7239                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7240                                 tmp &= ~LC_SET_QUIESCE;
7241                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7242                         }
7243                 }
7244         }
7245
7246         /* set the link speed */
7247         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7248         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7249         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7250
7251         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7252         tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7253         if (speed_cap == PCIE_SPEED_8_0GT)
7254                 tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7255         else if (speed_cap == PCIE_SPEED_5_0GT)
7256                 tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7257         else
7258                 tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7259         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7260
7261         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7262         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7263         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7264
7265         for (i = 0; i < rdev->usec_timeout; i++) {
7266                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7267                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7268                         break;
7269                 udelay(1);
7270         }
7271 }
7272
7273 static void si_program_aspm(struct radeon_device *rdev)
7274 {
7275         u32 data, orig;
7276         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7277         bool disable_clkreq = false;
7278
7279         if (radeon_aspm == 0)
7280                 return;
7281
7282         if (!(rdev->flags & RADEON_IS_PCIE))
7283                 return;
7284
7285         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7286         data &= ~LC_XMIT_N_FTS_MASK;
7287         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7288         if (orig != data)
7289                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7290
7291         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7292         data |= LC_GO_TO_RECOVERY;
7293         if (orig != data)
7294                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7295
7296         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7297         data |= P_IGNORE_EDB_ERR;
7298         if (orig != data)
7299                 WREG32_PCIE(PCIE_P_CNTL, data);
7300
7301         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7302         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7303         data |= LC_PMI_TO_L1_DIS;
7304         if (!disable_l0s)
7305                 data |= LC_L0S_INACTIVITY(7);
7306
7307         if (!disable_l1) {
7308                 data |= LC_L1_INACTIVITY(7);
7309                 data &= ~LC_PMI_TO_L1_DIS;
7310                 if (orig != data)
7311                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7312
7313                 if (!disable_plloff_in_l1) {
7314                         bool clk_req_support;
7315
7316                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7317                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7318                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7319                         if (orig != data)
7320                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7321
7322                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7323                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7324                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7325                         if (orig != data)
7326                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7327
7328                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7329                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7330                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7331                         if (orig != data)
7332                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7333
7334                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7335                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7336                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7337                         if (orig != data)
7338                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7339
7340                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7341                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7342                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7343                                 if (orig != data)
7344                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7345
7346                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7347                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7348                                 if (orig != data)
7349                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7350
7351                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7352                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7353                                 if (orig != data)
7354                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7355
7356                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7357                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7358                                 if (orig != data)
7359                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7360
7361                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7362                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7363                                 if (orig != data)
7364                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7365
7366                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7367                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7368                                 if (orig != data)
7369                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7370
7371                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7372                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7373                                 if (orig != data)
7374                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7375
7376                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7377                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7378                                 if (orig != data)
7379                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7380                         }
7381                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7382                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7383                         data |= LC_DYN_LANES_PWR_STATE(3);
7384                         if (orig != data)
7385                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7386
7387                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7388                         data &= ~LS2_EXIT_TIME_MASK;
7389                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7390                                 data |= LS2_EXIT_TIME(5);
7391                         if (orig != data)
7392                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7393
7394                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7395                         data &= ~LS2_EXIT_TIME_MASK;
7396                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7397                                 data |= LS2_EXIT_TIME(5);
7398                         if (orig != data)
7399                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7400
7401                         if (!disable_clkreq &&
7402                             !pci_is_root_bus(rdev->pdev->bus)) {
7403                                 struct pci_dev *root = rdev->pdev->bus->self;
7404                                 u32 lnkcap;
7405
7406                                 clk_req_support = false;
7407                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7408                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7409                                         clk_req_support = true;
7410                         } else {
7411                                 clk_req_support = false;
7412                         }
7413
7414                         if (clk_req_support) {
7415                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7416                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7417                                 if (orig != data)
7418                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7419
7420                                 orig = data = RREG32(THM_CLK_CNTL);
7421                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7422                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7423                                 if (orig != data)
7424                                         WREG32(THM_CLK_CNTL, data);
7425
7426                                 orig = data = RREG32(MISC_CLK_CNTL);
7427                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7428                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7429                                 if (orig != data)
7430                                         WREG32(MISC_CLK_CNTL, data);
7431
7432                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7433                                 data &= ~BCLK_AS_XCLK;
7434                                 if (orig != data)
7435                                         WREG32(CG_CLKPIN_CNTL, data);
7436
7437                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7438                                 data &= ~FORCE_BIF_REFCLK_EN;
7439                                 if (orig != data)
7440                                         WREG32(CG_CLKPIN_CNTL_2, data);
7441
7442                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7443                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7444                                 data |= MPLL_CLKOUT_SEL(4);
7445                                 if (orig != data)
7446                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7447
7448                                 orig = data = RREG32(SPLL_CNTL_MODE);
7449                                 data &= ~SPLL_REFCLK_SEL_MASK;
7450                                 if (orig != data)
7451                                         WREG32(SPLL_CNTL_MODE, data);
7452                         }
7453                 }
7454         } else {
7455                 if (orig != data)
7456                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7457         }
7458
7459         orig = data = RREG32_PCIE(PCIE_CNTL2);
7460         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7461         if (orig != data)
7462                 WREG32_PCIE(PCIE_CNTL2, data);
7463
7464         if (!disable_l0s) {
7465                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7466                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7467                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7468                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7469                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7470                                 data &= ~LC_L0S_INACTIVITY_MASK;
7471                                 if (orig != data)
7472                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7473                         }
7474                 }
7475         }
7476 }
7477
7478 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7479 {
7480         unsigned i;
7481
7482         /* make sure VCEPLL_CTLREQ is deasserted */
7483         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7484
7485         mdelay(10);
7486
7487         /* assert UPLL_CTLREQ */
7488         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7489
7490         /* wait for CTLACK and CTLACK2 to get asserted */
7491         for (i = 0; i < 100; ++i) {
7492                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7493                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7494                         break;
7495                 mdelay(10);
7496         }
7497
7498         /* deassert UPLL_CTLREQ */
7499         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7500
7501         if (i == 100) {
7502                 DRM_ERROR("Timeout setting UVD clocks!\n");
7503                 return -ETIMEDOUT;
7504         }
7505
7506         return 0;
7507 }
7508
7509 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7510 {
7511         unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7512         int r;
7513
7514         /* bypass evclk and ecclk with bclk */
7515         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7516                      EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7517                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7518
7519         /* put PLL in bypass mode */
7520         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7521                      ~VCEPLL_BYPASS_EN_MASK);
7522
7523         if (!evclk || !ecclk) {
7524                 /* keep the Bypass mode, put PLL to sleep */
7525                 WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7526                              ~VCEPLL_SLEEP_MASK);
7527                 return 0;
7528         }
7529
7530         r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7531                                           16384, 0x03FFFFFF, 0, 128, 5,
7532                                           &fb_div, &evclk_div, &ecclk_div);
7533         if (r)
7534                 return r;
7535
7536         /* set RESET_ANTI_MUX to 0 */
7537         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7538
7539         /* set VCO_MODE to 1 */
7540         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7541                      ~VCEPLL_VCO_MODE_MASK);
7542
7543         /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7544         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7545                      ~VCEPLL_SLEEP_MASK);
7546         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7547
7548         /* deassert VCEPLL_RESET */
7549         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7550
7551         mdelay(1);
7552
7553         r = si_vce_send_vcepll_ctlreq(rdev);
7554         if (r)
7555                 return r;
7556
7557         /* assert VCEPLL_RESET again */
7558         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7559
7560         /* disable spread spectrum. */
7561         WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7562
7563         /* set feedback divider */
7564         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7565
7566         /* set ref divider to 0 */
7567         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7568
7569         /* set PDIV_A and PDIV_B */
7570         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7571                      VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7572                      ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7573
7574         /* give the PLL some time to settle */
7575         mdelay(15);
7576
7577         /* deassert PLL_RESET */
7578         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7579
7580         mdelay(15);
7581
7582         /* switch from bypass mode to normal mode */
7583         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7584
7585         r = si_vce_send_vcepll_ctlreq(rdev);
7586         if (r)
7587                 return r;
7588
7589         /* switch VCLK and DCLK selection */
7590         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7591                      EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7592                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7593
7594         mdelay(100);
7595
7596         return 0;
7597 }