Merge tag 'arc-4.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
[sfrench/cifs-2.6.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37
38
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
69
70 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
71 MODULE_FIRMWARE("radeon/VERDE_me.bin");
72 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
75 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
76 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
77
78 MODULE_FIRMWARE("radeon/verde_pfp.bin");
79 MODULE_FIRMWARE("radeon/verde_me.bin");
80 MODULE_FIRMWARE("radeon/verde_ce.bin");
81 MODULE_FIRMWARE("radeon/verde_mc.bin");
82 MODULE_FIRMWARE("radeon/verde_rlc.bin");
83 MODULE_FIRMWARE("radeon/verde_smc.bin");
84 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
85
86 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
87 MODULE_FIRMWARE("radeon/OLAND_me.bin");
88 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
89 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
91 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
92 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
93
94 MODULE_FIRMWARE("radeon/oland_pfp.bin");
95 MODULE_FIRMWARE("radeon/oland_me.bin");
96 MODULE_FIRMWARE("radeon/oland_ce.bin");
97 MODULE_FIRMWARE("radeon/oland_mc.bin");
98 MODULE_FIRMWARE("radeon/oland_rlc.bin");
99 MODULE_FIRMWARE("radeon/oland_smc.bin");
100 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
101
102 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
109
110 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
111 MODULE_FIRMWARE("radeon/hainan_me.bin");
112 MODULE_FIRMWARE("radeon/hainan_ce.bin");
113 MODULE_FIRMWARE("radeon/hainan_mc.bin");
114 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
115 MODULE_FIRMWARE("radeon/hainan_smc.bin");
116 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
117 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
118
119 MODULE_FIRMWARE("radeon/si58_mc.bin");
120
121 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
122 static void si_pcie_gen3_enable(struct radeon_device *rdev);
123 static void si_program_aspm(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
127 extern void r600_ih_ring_fini(struct radeon_device *rdev);
128 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
129 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
130 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
131 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
132 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
133 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
134 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
135                                          bool enable);
136 static void si_init_pg(struct radeon_device *rdev);
137 static void si_init_cg(struct radeon_device *rdev);
138 static void si_fini_pg(struct radeon_device *rdev);
139 static void si_fini_cg(struct radeon_device *rdev);
140 static void si_rlc_stop(struct radeon_device *rdev);
141
142 static const u32 verde_rlc_save_restore_register_list[] =
143 {
144         (0x8000 << 16) | (0x98f4 >> 2),
145         0x00000000,
146         (0x8040 << 16) | (0x98f4 >> 2),
147         0x00000000,
148         (0x8000 << 16) | (0xe80 >> 2),
149         0x00000000,
150         (0x8040 << 16) | (0xe80 >> 2),
151         0x00000000,
152         (0x8000 << 16) | (0x89bc >> 2),
153         0x00000000,
154         (0x8040 << 16) | (0x89bc >> 2),
155         0x00000000,
156         (0x8000 << 16) | (0x8c1c >> 2),
157         0x00000000,
158         (0x8040 << 16) | (0x8c1c >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x98f0 >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0xe7c >> 2),
163         0x00000000,
164         (0x8000 << 16) | (0x9148 >> 2),
165         0x00000000,
166         (0x8040 << 16) | (0x9148 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x9150 >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x897c >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x8d8c >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0xac54 >> 2),
175         0X00000000,
176         0x3,
177         (0x9c00 << 16) | (0x98f8 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x9910 >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x9914 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x9918 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x991c >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x9920 >> 2),
188         0x00000000,
189         (0x9c00 << 16) | (0x9924 >> 2),
190         0x00000000,
191         (0x9c00 << 16) | (0x9928 >> 2),
192         0x00000000,
193         (0x9c00 << 16) | (0x992c >> 2),
194         0x00000000,
195         (0x9c00 << 16) | (0x9930 >> 2),
196         0x00000000,
197         (0x9c00 << 16) | (0x9934 >> 2),
198         0x00000000,
199         (0x9c00 << 16) | (0x9938 >> 2),
200         0x00000000,
201         (0x9c00 << 16) | (0x993c >> 2),
202         0x00000000,
203         (0x9c00 << 16) | (0x9940 >> 2),
204         0x00000000,
205         (0x9c00 << 16) | (0x9944 >> 2),
206         0x00000000,
207         (0x9c00 << 16) | (0x9948 >> 2),
208         0x00000000,
209         (0x9c00 << 16) | (0x994c >> 2),
210         0x00000000,
211         (0x9c00 << 16) | (0x9950 >> 2),
212         0x00000000,
213         (0x9c00 << 16) | (0x9954 >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x9958 >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x995c >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x9960 >> 2),
220         0x00000000,
221         (0x9c00 << 16) | (0x9964 >> 2),
222         0x00000000,
223         (0x9c00 << 16) | (0x9968 >> 2),
224         0x00000000,
225         (0x9c00 << 16) | (0x996c >> 2),
226         0x00000000,
227         (0x9c00 << 16) | (0x9970 >> 2),
228         0x00000000,
229         (0x9c00 << 16) | (0x9974 >> 2),
230         0x00000000,
231         (0x9c00 << 16) | (0x9978 >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x997c >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x9980 >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x9984 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x9988 >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x998c >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x8c00 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8c14 >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x8c04 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x8c08 >> 2),
250         0x00000000,
251         (0x8000 << 16) | (0x9b7c >> 2),
252         0x00000000,
253         (0x8040 << 16) | (0x9b7c >> 2),
254         0x00000000,
255         (0x8000 << 16) | (0xe84 >> 2),
256         0x00000000,
257         (0x8040 << 16) | (0xe84 >> 2),
258         0x00000000,
259         (0x8000 << 16) | (0x89c0 >> 2),
260         0x00000000,
261         (0x8040 << 16) | (0x89c0 >> 2),
262         0x00000000,
263         (0x8000 << 16) | (0x914c >> 2),
264         0x00000000,
265         (0x8040 << 16) | (0x914c >> 2),
266         0x00000000,
267         (0x8000 << 16) | (0x8c20 >> 2),
268         0x00000000,
269         (0x8040 << 16) | (0x8c20 >> 2),
270         0x00000000,
271         (0x8000 << 16) | (0x9354 >> 2),
272         0x00000000,
273         (0x8040 << 16) | (0x9354 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x9060 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x9364 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x9100 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x913c >> 2),
282         0x00000000,
283         (0x8000 << 16) | (0x90e0 >> 2),
284         0x00000000,
285         (0x8000 << 16) | (0x90e4 >> 2),
286         0x00000000,
287         (0x8000 << 16) | (0x90e8 >> 2),
288         0x00000000,
289         (0x8040 << 16) | (0x90e0 >> 2),
290         0x00000000,
291         (0x8040 << 16) | (0x90e4 >> 2),
292         0x00000000,
293         (0x8040 << 16) | (0x90e8 >> 2),
294         0x00000000,
295         (0x9c00 << 16) | (0x8bcc >> 2),
296         0x00000000,
297         (0x9c00 << 16) | (0x8b24 >> 2),
298         0x00000000,
299         (0x9c00 << 16) | (0x88c4 >> 2),
300         0x00000000,
301         (0x9c00 << 16) | (0x8e50 >> 2),
302         0x00000000,
303         (0x9c00 << 16) | (0x8c0c >> 2),
304         0x00000000,
305         (0x9c00 << 16) | (0x8e58 >> 2),
306         0x00000000,
307         (0x9c00 << 16) | (0x8e5c >> 2),
308         0x00000000,
309         (0x9c00 << 16) | (0x9508 >> 2),
310         0x00000000,
311         (0x9c00 << 16) | (0x950c >> 2),
312         0x00000000,
313         (0x9c00 << 16) | (0x9494 >> 2),
314         0x00000000,
315         (0x9c00 << 16) | (0xac0c >> 2),
316         0x00000000,
317         (0x9c00 << 16) | (0xac10 >> 2),
318         0x00000000,
319         (0x9c00 << 16) | (0xac14 >> 2),
320         0x00000000,
321         (0x9c00 << 16) | (0xae00 >> 2),
322         0x00000000,
323         (0x9c00 << 16) | (0xac08 >> 2),
324         0x00000000,
325         (0x9c00 << 16) | (0x88d4 >> 2),
326         0x00000000,
327         (0x9c00 << 16) | (0x88c8 >> 2),
328         0x00000000,
329         (0x9c00 << 16) | (0x88cc >> 2),
330         0x00000000,
331         (0x9c00 << 16) | (0x89b0 >> 2),
332         0x00000000,
333         (0x9c00 << 16) | (0x8b10 >> 2),
334         0x00000000,
335         (0x9c00 << 16) | (0x8a14 >> 2),
336         0x00000000,
337         (0x9c00 << 16) | (0x9830 >> 2),
338         0x00000000,
339         (0x9c00 << 16) | (0x9834 >> 2),
340         0x00000000,
341         (0x9c00 << 16) | (0x9838 >> 2),
342         0x00000000,
343         (0x9c00 << 16) | (0x9a10 >> 2),
344         0x00000000,
345         (0x8000 << 16) | (0x9870 >> 2),
346         0x00000000,
347         (0x8000 << 16) | (0x9874 >> 2),
348         0x00000000,
349         (0x8001 << 16) | (0x9870 >> 2),
350         0x00000000,
351         (0x8001 << 16) | (0x9874 >> 2),
352         0x00000000,
353         (0x8040 << 16) | (0x9870 >> 2),
354         0x00000000,
355         (0x8040 << 16) | (0x9874 >> 2),
356         0x00000000,
357         (0x8041 << 16) | (0x9870 >> 2),
358         0x00000000,
359         (0x8041 << 16) | (0x9874 >> 2),
360         0x00000000,
361         0x00000000
362 };
363
364 static const u32 tahiti_golden_rlc_registers[] =
365 {
366         0xc424, 0xffffffff, 0x00601005,
367         0xc47c, 0xffffffff, 0x10104040,
368         0xc488, 0xffffffff, 0x0100000a,
369         0xc314, 0xffffffff, 0x00000800,
370         0xc30c, 0xffffffff, 0x800000f4,
371         0xf4a8, 0xffffffff, 0x00000000
372 };
373
374 static const u32 tahiti_golden_registers[] =
375 {
376         0x9a10, 0x00010000, 0x00018208,
377         0x9830, 0xffffffff, 0x00000000,
378         0x9834, 0xf00fffff, 0x00000400,
379         0x9838, 0x0002021c, 0x00020200,
380         0xc78, 0x00000080, 0x00000000,
381         0xd030, 0x000300c0, 0x00800040,
382         0xd830, 0x000300c0, 0x00800040,
383         0x5bb0, 0x000000f0, 0x00000070,
384         0x5bc0, 0x00200000, 0x50100000,
385         0x7030, 0x31000311, 0x00000011,
386         0x277c, 0x00000003, 0x000007ff,
387         0x240c, 0x000007ff, 0x00000000,
388         0x8a14, 0xf000001f, 0x00000007,
389         0x8b24, 0xffffffff, 0x00ffffff,
390         0x8b10, 0x0000ff0f, 0x00000000,
391         0x28a4c, 0x07ffffff, 0x4e000000,
392         0x28350, 0x3f3f3fff, 0x2a00126a,
393         0x30, 0x000000ff, 0x0040,
394         0x34, 0x00000040, 0x00004040,
395         0x9100, 0x07ffffff, 0x03000000,
396         0x8e88, 0x01ff1f3f, 0x00000000,
397         0x8e84, 0x01ff1f3f, 0x00000000,
398         0x9060, 0x0000007f, 0x00000020,
399         0x9508, 0x00010000, 0x00010000,
400         0xac14, 0x00000200, 0x000002fb,
401         0xac10, 0xffffffff, 0x0000543b,
402         0xac0c, 0xffffffff, 0xa9210876,
403         0x88d0, 0xffffffff, 0x000fff40,
404         0x88d4, 0x0000001f, 0x00000010,
405         0x1410, 0x20000000, 0x20fffed8,
406         0x15c0, 0x000c0fc0, 0x000c0400
407 };
408
409 static const u32 tahiti_golden_registers2[] =
410 {
411         0xc64, 0x00000001, 0x00000001
412 };
413
414 static const u32 pitcairn_golden_rlc_registers[] =
415 {
416         0xc424, 0xffffffff, 0x00601004,
417         0xc47c, 0xffffffff, 0x10102020,
418         0xc488, 0xffffffff, 0x01000020,
419         0xc314, 0xffffffff, 0x00000800,
420         0xc30c, 0xffffffff, 0x800000a4
421 };
422
423 static const u32 pitcairn_golden_registers[] =
424 {
425         0x9a10, 0x00010000, 0x00018208,
426         0x9830, 0xffffffff, 0x00000000,
427         0x9834, 0xf00fffff, 0x00000400,
428         0x9838, 0x0002021c, 0x00020200,
429         0xc78, 0x00000080, 0x00000000,
430         0xd030, 0x000300c0, 0x00800040,
431         0xd830, 0x000300c0, 0x00800040,
432         0x5bb0, 0x000000f0, 0x00000070,
433         0x5bc0, 0x00200000, 0x50100000,
434         0x7030, 0x31000311, 0x00000011,
435         0x2ae4, 0x00073ffe, 0x000022a2,
436         0x240c, 0x000007ff, 0x00000000,
437         0x8a14, 0xf000001f, 0x00000007,
438         0x8b24, 0xffffffff, 0x00ffffff,
439         0x8b10, 0x0000ff0f, 0x00000000,
440         0x28a4c, 0x07ffffff, 0x4e000000,
441         0x28350, 0x3f3f3fff, 0x2a00126a,
442         0x30, 0x000000ff, 0x0040,
443         0x34, 0x00000040, 0x00004040,
444         0x9100, 0x07ffffff, 0x03000000,
445         0x9060, 0x0000007f, 0x00000020,
446         0x9508, 0x00010000, 0x00010000,
447         0xac14, 0x000003ff, 0x000000f7,
448         0xac10, 0xffffffff, 0x00000000,
449         0xac0c, 0xffffffff, 0x32761054,
450         0x88d4, 0x0000001f, 0x00000010,
451         0x15c0, 0x000c0fc0, 0x000c0400
452 };
453
454 static const u32 verde_golden_rlc_registers[] =
455 {
456         0xc424, 0xffffffff, 0x033f1005,
457         0xc47c, 0xffffffff, 0x10808020,
458         0xc488, 0xffffffff, 0x00800008,
459         0xc314, 0xffffffff, 0x00001000,
460         0xc30c, 0xffffffff, 0x80010014
461 };
462
463 static const u32 verde_golden_registers[] =
464 {
465         0x9a10, 0x00010000, 0x00018208,
466         0x9830, 0xffffffff, 0x00000000,
467         0x9834, 0xf00fffff, 0x00000400,
468         0x9838, 0x0002021c, 0x00020200,
469         0xc78, 0x00000080, 0x00000000,
470         0xd030, 0x000300c0, 0x00800040,
471         0xd030, 0x000300c0, 0x00800040,
472         0xd830, 0x000300c0, 0x00800040,
473         0xd830, 0x000300c0, 0x00800040,
474         0x5bb0, 0x000000f0, 0x00000070,
475         0x5bc0, 0x00200000, 0x50100000,
476         0x7030, 0x31000311, 0x00000011,
477         0x2ae4, 0x00073ffe, 0x000022a2,
478         0x2ae4, 0x00073ffe, 0x000022a2,
479         0x2ae4, 0x00073ffe, 0x000022a2,
480         0x240c, 0x000007ff, 0x00000000,
481         0x240c, 0x000007ff, 0x00000000,
482         0x240c, 0x000007ff, 0x00000000,
483         0x8a14, 0xf000001f, 0x00000007,
484         0x8a14, 0xf000001f, 0x00000007,
485         0x8a14, 0xf000001f, 0x00000007,
486         0x8b24, 0xffffffff, 0x00ffffff,
487         0x8b10, 0x0000ff0f, 0x00000000,
488         0x28a4c, 0x07ffffff, 0x4e000000,
489         0x28350, 0x3f3f3fff, 0x0000124a,
490         0x28350, 0x3f3f3fff, 0x0000124a,
491         0x28350, 0x3f3f3fff, 0x0000124a,
492         0x30, 0x000000ff, 0x0040,
493         0x34, 0x00000040, 0x00004040,
494         0x9100, 0x07ffffff, 0x03000000,
495         0x9100, 0x07ffffff, 0x03000000,
496         0x8e88, 0x01ff1f3f, 0x00000000,
497         0x8e88, 0x01ff1f3f, 0x00000000,
498         0x8e88, 0x01ff1f3f, 0x00000000,
499         0x8e84, 0x01ff1f3f, 0x00000000,
500         0x8e84, 0x01ff1f3f, 0x00000000,
501         0x8e84, 0x01ff1f3f, 0x00000000,
502         0x9060, 0x0000007f, 0x00000020,
503         0x9508, 0x00010000, 0x00010000,
504         0xac14, 0x000003ff, 0x00000003,
505         0xac14, 0x000003ff, 0x00000003,
506         0xac14, 0x000003ff, 0x00000003,
507         0xac10, 0xffffffff, 0x00000000,
508         0xac10, 0xffffffff, 0x00000000,
509         0xac10, 0xffffffff, 0x00000000,
510         0xac0c, 0xffffffff, 0x00001032,
511         0xac0c, 0xffffffff, 0x00001032,
512         0xac0c, 0xffffffff, 0x00001032,
513         0x88d4, 0x0000001f, 0x00000010,
514         0x88d4, 0x0000001f, 0x00000010,
515         0x88d4, 0x0000001f, 0x00000010,
516         0x15c0, 0x000c0fc0, 0x000c0400
517 };
518
519 static const u32 oland_golden_rlc_registers[] =
520 {
521         0xc424, 0xffffffff, 0x00601005,
522         0xc47c, 0xffffffff, 0x10104040,
523         0xc488, 0xffffffff, 0x0100000a,
524         0xc314, 0xffffffff, 0x00000800,
525         0xc30c, 0xffffffff, 0x800000f4
526 };
527
528 static const u32 oland_golden_registers[] =
529 {
530         0x9a10, 0x00010000, 0x00018208,
531         0x9830, 0xffffffff, 0x00000000,
532         0x9834, 0xf00fffff, 0x00000400,
533         0x9838, 0x0002021c, 0x00020200,
534         0xc78, 0x00000080, 0x00000000,
535         0xd030, 0x000300c0, 0x00800040,
536         0xd830, 0x000300c0, 0x00800040,
537         0x5bb0, 0x000000f0, 0x00000070,
538         0x5bc0, 0x00200000, 0x50100000,
539         0x7030, 0x31000311, 0x00000011,
540         0x2ae4, 0x00073ffe, 0x000022a2,
541         0x240c, 0x000007ff, 0x00000000,
542         0x8a14, 0xf000001f, 0x00000007,
543         0x8b24, 0xffffffff, 0x00ffffff,
544         0x8b10, 0x0000ff0f, 0x00000000,
545         0x28a4c, 0x07ffffff, 0x4e000000,
546         0x28350, 0x3f3f3fff, 0x00000082,
547         0x30, 0x000000ff, 0x0040,
548         0x34, 0x00000040, 0x00004040,
549         0x9100, 0x07ffffff, 0x03000000,
550         0x9060, 0x0000007f, 0x00000020,
551         0x9508, 0x00010000, 0x00010000,
552         0xac14, 0x000003ff, 0x000000f3,
553         0xac10, 0xffffffff, 0x00000000,
554         0xac0c, 0xffffffff, 0x00003210,
555         0x88d4, 0x0000001f, 0x00000010,
556         0x15c0, 0x000c0fc0, 0x000c0400
557 };
558
559 static const u32 hainan_golden_registers[] =
560 {
561         0x9a10, 0x00010000, 0x00018208,
562         0x9830, 0xffffffff, 0x00000000,
563         0x9834, 0xf00fffff, 0x00000400,
564         0x9838, 0x0002021c, 0x00020200,
565         0xd0c0, 0xff000fff, 0x00000100,
566         0xd030, 0x000300c0, 0x00800040,
567         0xd8c0, 0xff000fff, 0x00000100,
568         0xd830, 0x000300c0, 0x00800040,
569         0x2ae4, 0x00073ffe, 0x000022a2,
570         0x240c, 0x000007ff, 0x00000000,
571         0x8a14, 0xf000001f, 0x00000007,
572         0x8b24, 0xffffffff, 0x00ffffff,
573         0x8b10, 0x0000ff0f, 0x00000000,
574         0x28a4c, 0x07ffffff, 0x4e000000,
575         0x28350, 0x3f3f3fff, 0x00000000,
576         0x30, 0x000000ff, 0x0040,
577         0x34, 0x00000040, 0x00004040,
578         0x9100, 0x03e00000, 0x03600000,
579         0x9060, 0x0000007f, 0x00000020,
580         0x9508, 0x00010000, 0x00010000,
581         0xac14, 0x000003ff, 0x000000f1,
582         0xac10, 0xffffffff, 0x00000000,
583         0xac0c, 0xffffffff, 0x00003210,
584         0x88d4, 0x0000001f, 0x00000010,
585         0x15c0, 0x000c0fc0, 0x000c0400
586 };
587
588 static const u32 hainan_golden_registers2[] =
589 {
590         0x98f8, 0xffffffff, 0x02010001
591 };
592
593 static const u32 tahiti_mgcg_cgcg_init[] =
594 {
595         0xc400, 0xffffffff, 0xfffffffc,
596         0x802c, 0xffffffff, 0xe0000000,
597         0x9a60, 0xffffffff, 0x00000100,
598         0x92a4, 0xffffffff, 0x00000100,
599         0xc164, 0xffffffff, 0x00000100,
600         0x9774, 0xffffffff, 0x00000100,
601         0x8984, 0xffffffff, 0x06000100,
602         0x8a18, 0xffffffff, 0x00000100,
603         0x92a0, 0xffffffff, 0x00000100,
604         0xc380, 0xffffffff, 0x00000100,
605         0x8b28, 0xffffffff, 0x00000100,
606         0x9144, 0xffffffff, 0x00000100,
607         0x8d88, 0xffffffff, 0x00000100,
608         0x8d8c, 0xffffffff, 0x00000100,
609         0x9030, 0xffffffff, 0x00000100,
610         0x9034, 0xffffffff, 0x00000100,
611         0x9038, 0xffffffff, 0x00000100,
612         0x903c, 0xffffffff, 0x00000100,
613         0xad80, 0xffffffff, 0x00000100,
614         0xac54, 0xffffffff, 0x00000100,
615         0x897c, 0xffffffff, 0x06000100,
616         0x9868, 0xffffffff, 0x00000100,
617         0x9510, 0xffffffff, 0x00000100,
618         0xaf04, 0xffffffff, 0x00000100,
619         0xae04, 0xffffffff, 0x00000100,
620         0x949c, 0xffffffff, 0x00000100,
621         0x802c, 0xffffffff, 0xe0000000,
622         0x9160, 0xffffffff, 0x00010000,
623         0x9164, 0xffffffff, 0x00030002,
624         0x9168, 0xffffffff, 0x00040007,
625         0x916c, 0xffffffff, 0x00060005,
626         0x9170, 0xffffffff, 0x00090008,
627         0x9174, 0xffffffff, 0x00020001,
628         0x9178, 0xffffffff, 0x00040003,
629         0x917c, 0xffffffff, 0x00000007,
630         0x9180, 0xffffffff, 0x00060005,
631         0x9184, 0xffffffff, 0x00090008,
632         0x9188, 0xffffffff, 0x00030002,
633         0x918c, 0xffffffff, 0x00050004,
634         0x9190, 0xffffffff, 0x00000008,
635         0x9194, 0xffffffff, 0x00070006,
636         0x9198, 0xffffffff, 0x000a0009,
637         0x919c, 0xffffffff, 0x00040003,
638         0x91a0, 0xffffffff, 0x00060005,
639         0x91a4, 0xffffffff, 0x00000009,
640         0x91a8, 0xffffffff, 0x00080007,
641         0x91ac, 0xffffffff, 0x000b000a,
642         0x91b0, 0xffffffff, 0x00050004,
643         0x91b4, 0xffffffff, 0x00070006,
644         0x91b8, 0xffffffff, 0x0008000b,
645         0x91bc, 0xffffffff, 0x000a0009,
646         0x91c0, 0xffffffff, 0x000d000c,
647         0x91c4, 0xffffffff, 0x00060005,
648         0x91c8, 0xffffffff, 0x00080007,
649         0x91cc, 0xffffffff, 0x0000000b,
650         0x91d0, 0xffffffff, 0x000a0009,
651         0x91d4, 0xffffffff, 0x000d000c,
652         0x91d8, 0xffffffff, 0x00070006,
653         0x91dc, 0xffffffff, 0x00090008,
654         0x91e0, 0xffffffff, 0x0000000c,
655         0x91e4, 0xffffffff, 0x000b000a,
656         0x91e8, 0xffffffff, 0x000e000d,
657         0x91ec, 0xffffffff, 0x00080007,
658         0x91f0, 0xffffffff, 0x000a0009,
659         0x91f4, 0xffffffff, 0x0000000d,
660         0x91f8, 0xffffffff, 0x000c000b,
661         0x91fc, 0xffffffff, 0x000f000e,
662         0x9200, 0xffffffff, 0x00090008,
663         0x9204, 0xffffffff, 0x000b000a,
664         0x9208, 0xffffffff, 0x000c000f,
665         0x920c, 0xffffffff, 0x000e000d,
666         0x9210, 0xffffffff, 0x00110010,
667         0x9214, 0xffffffff, 0x000a0009,
668         0x9218, 0xffffffff, 0x000c000b,
669         0x921c, 0xffffffff, 0x0000000f,
670         0x9220, 0xffffffff, 0x000e000d,
671         0x9224, 0xffffffff, 0x00110010,
672         0x9228, 0xffffffff, 0x000b000a,
673         0x922c, 0xffffffff, 0x000d000c,
674         0x9230, 0xffffffff, 0x00000010,
675         0x9234, 0xffffffff, 0x000f000e,
676         0x9238, 0xffffffff, 0x00120011,
677         0x923c, 0xffffffff, 0x000c000b,
678         0x9240, 0xffffffff, 0x000e000d,
679         0x9244, 0xffffffff, 0x00000011,
680         0x9248, 0xffffffff, 0x0010000f,
681         0x924c, 0xffffffff, 0x00130012,
682         0x9250, 0xffffffff, 0x000d000c,
683         0x9254, 0xffffffff, 0x000f000e,
684         0x9258, 0xffffffff, 0x00100013,
685         0x925c, 0xffffffff, 0x00120011,
686         0x9260, 0xffffffff, 0x00150014,
687         0x9264, 0xffffffff, 0x000e000d,
688         0x9268, 0xffffffff, 0x0010000f,
689         0x926c, 0xffffffff, 0x00000013,
690         0x9270, 0xffffffff, 0x00120011,
691         0x9274, 0xffffffff, 0x00150014,
692         0x9278, 0xffffffff, 0x000f000e,
693         0x927c, 0xffffffff, 0x00110010,
694         0x9280, 0xffffffff, 0x00000014,
695         0x9284, 0xffffffff, 0x00130012,
696         0x9288, 0xffffffff, 0x00160015,
697         0x928c, 0xffffffff, 0x0010000f,
698         0x9290, 0xffffffff, 0x00120011,
699         0x9294, 0xffffffff, 0x00000015,
700         0x9298, 0xffffffff, 0x00140013,
701         0x929c, 0xffffffff, 0x00170016,
702         0x9150, 0xffffffff, 0x96940200,
703         0x8708, 0xffffffff, 0x00900100,
704         0xc478, 0xffffffff, 0x00000080,
705         0xc404, 0xffffffff, 0x0020003f,
706         0x30, 0xffffffff, 0x0000001c,
707         0x34, 0x000f0000, 0x000f0000,
708         0x160c, 0xffffffff, 0x00000100,
709         0x1024, 0xffffffff, 0x00000100,
710         0x102c, 0x00000101, 0x00000000,
711         0x20a8, 0xffffffff, 0x00000104,
712         0x264c, 0x000c0000, 0x000c0000,
713         0x2648, 0x000c0000, 0x000c0000,
714         0x55e4, 0xff000fff, 0x00000100,
715         0x55e8, 0x00000001, 0x00000001,
716         0x2f50, 0x00000001, 0x00000001,
717         0x30cc, 0xc0000fff, 0x00000104,
718         0xc1e4, 0x00000001, 0x00000001,
719         0xd0c0, 0xfffffff0, 0x00000100,
720         0xd8c0, 0xfffffff0, 0x00000100
721 };
722
723 static const u32 pitcairn_mgcg_cgcg_init[] =
724 {
725         0xc400, 0xffffffff, 0xfffffffc,
726         0x802c, 0xffffffff, 0xe0000000,
727         0x9a60, 0xffffffff, 0x00000100,
728         0x92a4, 0xffffffff, 0x00000100,
729         0xc164, 0xffffffff, 0x00000100,
730         0x9774, 0xffffffff, 0x00000100,
731         0x8984, 0xffffffff, 0x06000100,
732         0x8a18, 0xffffffff, 0x00000100,
733         0x92a0, 0xffffffff, 0x00000100,
734         0xc380, 0xffffffff, 0x00000100,
735         0x8b28, 0xffffffff, 0x00000100,
736         0x9144, 0xffffffff, 0x00000100,
737         0x8d88, 0xffffffff, 0x00000100,
738         0x8d8c, 0xffffffff, 0x00000100,
739         0x9030, 0xffffffff, 0x00000100,
740         0x9034, 0xffffffff, 0x00000100,
741         0x9038, 0xffffffff, 0x00000100,
742         0x903c, 0xffffffff, 0x00000100,
743         0xad80, 0xffffffff, 0x00000100,
744         0xac54, 0xffffffff, 0x00000100,
745         0x897c, 0xffffffff, 0x06000100,
746         0x9868, 0xffffffff, 0x00000100,
747         0x9510, 0xffffffff, 0x00000100,
748         0xaf04, 0xffffffff, 0x00000100,
749         0xae04, 0xffffffff, 0x00000100,
750         0x949c, 0xffffffff, 0x00000100,
751         0x802c, 0xffffffff, 0xe0000000,
752         0x9160, 0xffffffff, 0x00010000,
753         0x9164, 0xffffffff, 0x00030002,
754         0x9168, 0xffffffff, 0x00040007,
755         0x916c, 0xffffffff, 0x00060005,
756         0x9170, 0xffffffff, 0x00090008,
757         0x9174, 0xffffffff, 0x00020001,
758         0x9178, 0xffffffff, 0x00040003,
759         0x917c, 0xffffffff, 0x00000007,
760         0x9180, 0xffffffff, 0x00060005,
761         0x9184, 0xffffffff, 0x00090008,
762         0x9188, 0xffffffff, 0x00030002,
763         0x918c, 0xffffffff, 0x00050004,
764         0x9190, 0xffffffff, 0x00000008,
765         0x9194, 0xffffffff, 0x00070006,
766         0x9198, 0xffffffff, 0x000a0009,
767         0x919c, 0xffffffff, 0x00040003,
768         0x91a0, 0xffffffff, 0x00060005,
769         0x91a4, 0xffffffff, 0x00000009,
770         0x91a8, 0xffffffff, 0x00080007,
771         0x91ac, 0xffffffff, 0x000b000a,
772         0x91b0, 0xffffffff, 0x00050004,
773         0x91b4, 0xffffffff, 0x00070006,
774         0x91b8, 0xffffffff, 0x0008000b,
775         0x91bc, 0xffffffff, 0x000a0009,
776         0x91c0, 0xffffffff, 0x000d000c,
777         0x9200, 0xffffffff, 0x00090008,
778         0x9204, 0xffffffff, 0x000b000a,
779         0x9208, 0xffffffff, 0x000c000f,
780         0x920c, 0xffffffff, 0x000e000d,
781         0x9210, 0xffffffff, 0x00110010,
782         0x9214, 0xffffffff, 0x000a0009,
783         0x9218, 0xffffffff, 0x000c000b,
784         0x921c, 0xffffffff, 0x0000000f,
785         0x9220, 0xffffffff, 0x000e000d,
786         0x9224, 0xffffffff, 0x00110010,
787         0x9228, 0xffffffff, 0x000b000a,
788         0x922c, 0xffffffff, 0x000d000c,
789         0x9230, 0xffffffff, 0x00000010,
790         0x9234, 0xffffffff, 0x000f000e,
791         0x9238, 0xffffffff, 0x00120011,
792         0x923c, 0xffffffff, 0x000c000b,
793         0x9240, 0xffffffff, 0x000e000d,
794         0x9244, 0xffffffff, 0x00000011,
795         0x9248, 0xffffffff, 0x0010000f,
796         0x924c, 0xffffffff, 0x00130012,
797         0x9250, 0xffffffff, 0x000d000c,
798         0x9254, 0xffffffff, 0x000f000e,
799         0x9258, 0xffffffff, 0x00100013,
800         0x925c, 0xffffffff, 0x00120011,
801         0x9260, 0xffffffff, 0x00150014,
802         0x9150, 0xffffffff, 0x96940200,
803         0x8708, 0xffffffff, 0x00900100,
804         0xc478, 0xffffffff, 0x00000080,
805         0xc404, 0xffffffff, 0x0020003f,
806         0x30, 0xffffffff, 0x0000001c,
807         0x34, 0x000f0000, 0x000f0000,
808         0x160c, 0xffffffff, 0x00000100,
809         0x1024, 0xffffffff, 0x00000100,
810         0x102c, 0x00000101, 0x00000000,
811         0x20a8, 0xffffffff, 0x00000104,
812         0x55e4, 0xff000fff, 0x00000100,
813         0x55e8, 0x00000001, 0x00000001,
814         0x2f50, 0x00000001, 0x00000001,
815         0x30cc, 0xc0000fff, 0x00000104,
816         0xc1e4, 0x00000001, 0x00000001,
817         0xd0c0, 0xfffffff0, 0x00000100,
818         0xd8c0, 0xfffffff0, 0x00000100
819 };
820
821 static const u32 verde_mgcg_cgcg_init[] =
822 {
823         0xc400, 0xffffffff, 0xfffffffc,
824         0x802c, 0xffffffff, 0xe0000000,
825         0x9a60, 0xffffffff, 0x00000100,
826         0x92a4, 0xffffffff, 0x00000100,
827         0xc164, 0xffffffff, 0x00000100,
828         0x9774, 0xffffffff, 0x00000100,
829         0x8984, 0xffffffff, 0x06000100,
830         0x8a18, 0xffffffff, 0x00000100,
831         0x92a0, 0xffffffff, 0x00000100,
832         0xc380, 0xffffffff, 0x00000100,
833         0x8b28, 0xffffffff, 0x00000100,
834         0x9144, 0xffffffff, 0x00000100,
835         0x8d88, 0xffffffff, 0x00000100,
836         0x8d8c, 0xffffffff, 0x00000100,
837         0x9030, 0xffffffff, 0x00000100,
838         0x9034, 0xffffffff, 0x00000100,
839         0x9038, 0xffffffff, 0x00000100,
840         0x903c, 0xffffffff, 0x00000100,
841         0xad80, 0xffffffff, 0x00000100,
842         0xac54, 0xffffffff, 0x00000100,
843         0x897c, 0xffffffff, 0x06000100,
844         0x9868, 0xffffffff, 0x00000100,
845         0x9510, 0xffffffff, 0x00000100,
846         0xaf04, 0xffffffff, 0x00000100,
847         0xae04, 0xffffffff, 0x00000100,
848         0x949c, 0xffffffff, 0x00000100,
849         0x802c, 0xffffffff, 0xe0000000,
850         0x9160, 0xffffffff, 0x00010000,
851         0x9164, 0xffffffff, 0x00030002,
852         0x9168, 0xffffffff, 0x00040007,
853         0x916c, 0xffffffff, 0x00060005,
854         0x9170, 0xffffffff, 0x00090008,
855         0x9174, 0xffffffff, 0x00020001,
856         0x9178, 0xffffffff, 0x00040003,
857         0x917c, 0xffffffff, 0x00000007,
858         0x9180, 0xffffffff, 0x00060005,
859         0x9184, 0xffffffff, 0x00090008,
860         0x9188, 0xffffffff, 0x00030002,
861         0x918c, 0xffffffff, 0x00050004,
862         0x9190, 0xffffffff, 0x00000008,
863         0x9194, 0xffffffff, 0x00070006,
864         0x9198, 0xffffffff, 0x000a0009,
865         0x919c, 0xffffffff, 0x00040003,
866         0x91a0, 0xffffffff, 0x00060005,
867         0x91a4, 0xffffffff, 0x00000009,
868         0x91a8, 0xffffffff, 0x00080007,
869         0x91ac, 0xffffffff, 0x000b000a,
870         0x91b0, 0xffffffff, 0x00050004,
871         0x91b4, 0xffffffff, 0x00070006,
872         0x91b8, 0xffffffff, 0x0008000b,
873         0x91bc, 0xffffffff, 0x000a0009,
874         0x91c0, 0xffffffff, 0x000d000c,
875         0x9200, 0xffffffff, 0x00090008,
876         0x9204, 0xffffffff, 0x000b000a,
877         0x9208, 0xffffffff, 0x000c000f,
878         0x920c, 0xffffffff, 0x000e000d,
879         0x9210, 0xffffffff, 0x00110010,
880         0x9214, 0xffffffff, 0x000a0009,
881         0x9218, 0xffffffff, 0x000c000b,
882         0x921c, 0xffffffff, 0x0000000f,
883         0x9220, 0xffffffff, 0x000e000d,
884         0x9224, 0xffffffff, 0x00110010,
885         0x9228, 0xffffffff, 0x000b000a,
886         0x922c, 0xffffffff, 0x000d000c,
887         0x9230, 0xffffffff, 0x00000010,
888         0x9234, 0xffffffff, 0x000f000e,
889         0x9238, 0xffffffff, 0x00120011,
890         0x923c, 0xffffffff, 0x000c000b,
891         0x9240, 0xffffffff, 0x000e000d,
892         0x9244, 0xffffffff, 0x00000011,
893         0x9248, 0xffffffff, 0x0010000f,
894         0x924c, 0xffffffff, 0x00130012,
895         0x9250, 0xffffffff, 0x000d000c,
896         0x9254, 0xffffffff, 0x000f000e,
897         0x9258, 0xffffffff, 0x00100013,
898         0x925c, 0xffffffff, 0x00120011,
899         0x9260, 0xffffffff, 0x00150014,
900         0x9150, 0xffffffff, 0x96940200,
901         0x8708, 0xffffffff, 0x00900100,
902         0xc478, 0xffffffff, 0x00000080,
903         0xc404, 0xffffffff, 0x0020003f,
904         0x30, 0xffffffff, 0x0000001c,
905         0x34, 0x000f0000, 0x000f0000,
906         0x160c, 0xffffffff, 0x00000100,
907         0x1024, 0xffffffff, 0x00000100,
908         0x102c, 0x00000101, 0x00000000,
909         0x20a8, 0xffffffff, 0x00000104,
910         0x264c, 0x000c0000, 0x000c0000,
911         0x2648, 0x000c0000, 0x000c0000,
912         0x55e4, 0xff000fff, 0x00000100,
913         0x55e8, 0x00000001, 0x00000001,
914         0x2f50, 0x00000001, 0x00000001,
915         0x30cc, 0xc0000fff, 0x00000104,
916         0xc1e4, 0x00000001, 0x00000001,
917         0xd0c0, 0xfffffff0, 0x00000100,
918         0xd8c0, 0xfffffff0, 0x00000100
919 };
920
921 static const u32 oland_mgcg_cgcg_init[] =
922 {
923         0xc400, 0xffffffff, 0xfffffffc,
924         0x802c, 0xffffffff, 0xe0000000,
925         0x9a60, 0xffffffff, 0x00000100,
926         0x92a4, 0xffffffff, 0x00000100,
927         0xc164, 0xffffffff, 0x00000100,
928         0x9774, 0xffffffff, 0x00000100,
929         0x8984, 0xffffffff, 0x06000100,
930         0x8a18, 0xffffffff, 0x00000100,
931         0x92a0, 0xffffffff, 0x00000100,
932         0xc380, 0xffffffff, 0x00000100,
933         0x8b28, 0xffffffff, 0x00000100,
934         0x9144, 0xffffffff, 0x00000100,
935         0x8d88, 0xffffffff, 0x00000100,
936         0x8d8c, 0xffffffff, 0x00000100,
937         0x9030, 0xffffffff, 0x00000100,
938         0x9034, 0xffffffff, 0x00000100,
939         0x9038, 0xffffffff, 0x00000100,
940         0x903c, 0xffffffff, 0x00000100,
941         0xad80, 0xffffffff, 0x00000100,
942         0xac54, 0xffffffff, 0x00000100,
943         0x897c, 0xffffffff, 0x06000100,
944         0x9868, 0xffffffff, 0x00000100,
945         0x9510, 0xffffffff, 0x00000100,
946         0xaf04, 0xffffffff, 0x00000100,
947         0xae04, 0xffffffff, 0x00000100,
948         0x949c, 0xffffffff, 0x00000100,
949         0x802c, 0xffffffff, 0xe0000000,
950         0x9160, 0xffffffff, 0x00010000,
951         0x9164, 0xffffffff, 0x00030002,
952         0x9168, 0xffffffff, 0x00040007,
953         0x916c, 0xffffffff, 0x00060005,
954         0x9170, 0xffffffff, 0x00090008,
955         0x9174, 0xffffffff, 0x00020001,
956         0x9178, 0xffffffff, 0x00040003,
957         0x917c, 0xffffffff, 0x00000007,
958         0x9180, 0xffffffff, 0x00060005,
959         0x9184, 0xffffffff, 0x00090008,
960         0x9188, 0xffffffff, 0x00030002,
961         0x918c, 0xffffffff, 0x00050004,
962         0x9190, 0xffffffff, 0x00000008,
963         0x9194, 0xffffffff, 0x00070006,
964         0x9198, 0xffffffff, 0x000a0009,
965         0x919c, 0xffffffff, 0x00040003,
966         0x91a0, 0xffffffff, 0x00060005,
967         0x91a4, 0xffffffff, 0x00000009,
968         0x91a8, 0xffffffff, 0x00080007,
969         0x91ac, 0xffffffff, 0x000b000a,
970         0x91b0, 0xffffffff, 0x00050004,
971         0x91b4, 0xffffffff, 0x00070006,
972         0x91b8, 0xffffffff, 0x0008000b,
973         0x91bc, 0xffffffff, 0x000a0009,
974         0x91c0, 0xffffffff, 0x000d000c,
975         0x91c4, 0xffffffff, 0x00060005,
976         0x91c8, 0xffffffff, 0x00080007,
977         0x91cc, 0xffffffff, 0x0000000b,
978         0x91d0, 0xffffffff, 0x000a0009,
979         0x91d4, 0xffffffff, 0x000d000c,
980         0x9150, 0xffffffff, 0x96940200,
981         0x8708, 0xffffffff, 0x00900100,
982         0xc478, 0xffffffff, 0x00000080,
983         0xc404, 0xffffffff, 0x0020003f,
984         0x30, 0xffffffff, 0x0000001c,
985         0x34, 0x000f0000, 0x000f0000,
986         0x160c, 0xffffffff, 0x00000100,
987         0x1024, 0xffffffff, 0x00000100,
988         0x102c, 0x00000101, 0x00000000,
989         0x20a8, 0xffffffff, 0x00000104,
990         0x264c, 0x000c0000, 0x000c0000,
991         0x2648, 0x000c0000, 0x000c0000,
992         0x55e4, 0xff000fff, 0x00000100,
993         0x55e8, 0x00000001, 0x00000001,
994         0x2f50, 0x00000001, 0x00000001,
995         0x30cc, 0xc0000fff, 0x00000104,
996         0xc1e4, 0x00000001, 0x00000001,
997         0xd0c0, 0xfffffff0, 0x00000100,
998         0xd8c0, 0xfffffff0, 0x00000100
999 };
1000
1001 static const u32 hainan_mgcg_cgcg_init[] =
1002 {
1003         0xc400, 0xffffffff, 0xfffffffc,
1004         0x802c, 0xffffffff, 0xe0000000,
1005         0x9a60, 0xffffffff, 0x00000100,
1006         0x92a4, 0xffffffff, 0x00000100,
1007         0xc164, 0xffffffff, 0x00000100,
1008         0x9774, 0xffffffff, 0x00000100,
1009         0x8984, 0xffffffff, 0x06000100,
1010         0x8a18, 0xffffffff, 0x00000100,
1011         0x92a0, 0xffffffff, 0x00000100,
1012         0xc380, 0xffffffff, 0x00000100,
1013         0x8b28, 0xffffffff, 0x00000100,
1014         0x9144, 0xffffffff, 0x00000100,
1015         0x8d88, 0xffffffff, 0x00000100,
1016         0x8d8c, 0xffffffff, 0x00000100,
1017         0x9030, 0xffffffff, 0x00000100,
1018         0x9034, 0xffffffff, 0x00000100,
1019         0x9038, 0xffffffff, 0x00000100,
1020         0x903c, 0xffffffff, 0x00000100,
1021         0xad80, 0xffffffff, 0x00000100,
1022         0xac54, 0xffffffff, 0x00000100,
1023         0x897c, 0xffffffff, 0x06000100,
1024         0x9868, 0xffffffff, 0x00000100,
1025         0x9510, 0xffffffff, 0x00000100,
1026         0xaf04, 0xffffffff, 0x00000100,
1027         0xae04, 0xffffffff, 0x00000100,
1028         0x949c, 0xffffffff, 0x00000100,
1029         0x802c, 0xffffffff, 0xe0000000,
1030         0x9160, 0xffffffff, 0x00010000,
1031         0x9164, 0xffffffff, 0x00030002,
1032         0x9168, 0xffffffff, 0x00040007,
1033         0x916c, 0xffffffff, 0x00060005,
1034         0x9170, 0xffffffff, 0x00090008,
1035         0x9174, 0xffffffff, 0x00020001,
1036         0x9178, 0xffffffff, 0x00040003,
1037         0x917c, 0xffffffff, 0x00000007,
1038         0x9180, 0xffffffff, 0x00060005,
1039         0x9184, 0xffffffff, 0x00090008,
1040         0x9188, 0xffffffff, 0x00030002,
1041         0x918c, 0xffffffff, 0x00050004,
1042         0x9190, 0xffffffff, 0x00000008,
1043         0x9194, 0xffffffff, 0x00070006,
1044         0x9198, 0xffffffff, 0x000a0009,
1045         0x919c, 0xffffffff, 0x00040003,
1046         0x91a0, 0xffffffff, 0x00060005,
1047         0x91a4, 0xffffffff, 0x00000009,
1048         0x91a8, 0xffffffff, 0x00080007,
1049         0x91ac, 0xffffffff, 0x000b000a,
1050         0x91b0, 0xffffffff, 0x00050004,
1051         0x91b4, 0xffffffff, 0x00070006,
1052         0x91b8, 0xffffffff, 0x0008000b,
1053         0x91bc, 0xffffffff, 0x000a0009,
1054         0x91c0, 0xffffffff, 0x000d000c,
1055         0x91c4, 0xffffffff, 0x00060005,
1056         0x91c8, 0xffffffff, 0x00080007,
1057         0x91cc, 0xffffffff, 0x0000000b,
1058         0x91d0, 0xffffffff, 0x000a0009,
1059         0x91d4, 0xffffffff, 0x000d000c,
1060         0x9150, 0xffffffff, 0x96940200,
1061         0x8708, 0xffffffff, 0x00900100,
1062         0xc478, 0xffffffff, 0x00000080,
1063         0xc404, 0xffffffff, 0x0020003f,
1064         0x30, 0xffffffff, 0x0000001c,
1065         0x34, 0x000f0000, 0x000f0000,
1066         0x160c, 0xffffffff, 0x00000100,
1067         0x1024, 0xffffffff, 0x00000100,
1068         0x20a8, 0xffffffff, 0x00000104,
1069         0x264c, 0x000c0000, 0x000c0000,
1070         0x2648, 0x000c0000, 0x000c0000,
1071         0x2f50, 0x00000001, 0x00000001,
1072         0x30cc, 0xc0000fff, 0x00000104,
1073         0xc1e4, 0x00000001, 0x00000001,
1074         0xd0c0, 0xfffffff0, 0x00000100,
1075         0xd8c0, 0xfffffff0, 0x00000100
1076 };
1077
1078 static u32 verde_pg_init[] =
1079 {
1080         0x353c, 0xffffffff, 0x40000,
1081         0x3538, 0xffffffff, 0x200010ff,
1082         0x353c, 0xffffffff, 0x0,
1083         0x353c, 0xffffffff, 0x0,
1084         0x353c, 0xffffffff, 0x0,
1085         0x353c, 0xffffffff, 0x0,
1086         0x353c, 0xffffffff, 0x0,
1087         0x353c, 0xffffffff, 0x7007,
1088         0x3538, 0xffffffff, 0x300010ff,
1089         0x353c, 0xffffffff, 0x0,
1090         0x353c, 0xffffffff, 0x0,
1091         0x353c, 0xffffffff, 0x0,
1092         0x353c, 0xffffffff, 0x0,
1093         0x353c, 0xffffffff, 0x0,
1094         0x353c, 0xffffffff, 0x400000,
1095         0x3538, 0xffffffff, 0x100010ff,
1096         0x353c, 0xffffffff, 0x0,
1097         0x353c, 0xffffffff, 0x0,
1098         0x353c, 0xffffffff, 0x0,
1099         0x353c, 0xffffffff, 0x0,
1100         0x353c, 0xffffffff, 0x0,
1101         0x353c, 0xffffffff, 0x120200,
1102         0x3538, 0xffffffff, 0x500010ff,
1103         0x353c, 0xffffffff, 0x0,
1104         0x353c, 0xffffffff, 0x0,
1105         0x353c, 0xffffffff, 0x0,
1106         0x353c, 0xffffffff, 0x0,
1107         0x353c, 0xffffffff, 0x0,
1108         0x353c, 0xffffffff, 0x1e1e16,
1109         0x3538, 0xffffffff, 0x600010ff,
1110         0x353c, 0xffffffff, 0x0,
1111         0x353c, 0xffffffff, 0x0,
1112         0x353c, 0xffffffff, 0x0,
1113         0x353c, 0xffffffff, 0x0,
1114         0x353c, 0xffffffff, 0x0,
1115         0x353c, 0xffffffff, 0x171f1e,
1116         0x3538, 0xffffffff, 0x700010ff,
1117         0x353c, 0xffffffff, 0x0,
1118         0x353c, 0xffffffff, 0x0,
1119         0x353c, 0xffffffff, 0x0,
1120         0x353c, 0xffffffff, 0x0,
1121         0x353c, 0xffffffff, 0x0,
1122         0x353c, 0xffffffff, 0x0,
1123         0x3538, 0xffffffff, 0x9ff,
1124         0x3500, 0xffffffff, 0x0,
1125         0x3504, 0xffffffff, 0x10000800,
1126         0x3504, 0xffffffff, 0xf,
1127         0x3504, 0xffffffff, 0xf,
1128         0x3500, 0xffffffff, 0x4,
1129         0x3504, 0xffffffff, 0x1000051e,
1130         0x3504, 0xffffffff, 0xffff,
1131         0x3504, 0xffffffff, 0xffff,
1132         0x3500, 0xffffffff, 0x8,
1133         0x3504, 0xffffffff, 0x80500,
1134         0x3500, 0xffffffff, 0x12,
1135         0x3504, 0xffffffff, 0x9050c,
1136         0x3500, 0xffffffff, 0x1d,
1137         0x3504, 0xffffffff, 0xb052c,
1138         0x3500, 0xffffffff, 0x2a,
1139         0x3504, 0xffffffff, 0x1053e,
1140         0x3500, 0xffffffff, 0x2d,
1141         0x3504, 0xffffffff, 0x10546,
1142         0x3500, 0xffffffff, 0x30,
1143         0x3504, 0xffffffff, 0xa054e,
1144         0x3500, 0xffffffff, 0x3c,
1145         0x3504, 0xffffffff, 0x1055f,
1146         0x3500, 0xffffffff, 0x3f,
1147         0x3504, 0xffffffff, 0x10567,
1148         0x3500, 0xffffffff, 0x42,
1149         0x3504, 0xffffffff, 0x1056f,
1150         0x3500, 0xffffffff, 0x45,
1151         0x3504, 0xffffffff, 0x10572,
1152         0x3500, 0xffffffff, 0x48,
1153         0x3504, 0xffffffff, 0x20575,
1154         0x3500, 0xffffffff, 0x4c,
1155         0x3504, 0xffffffff, 0x190801,
1156         0x3500, 0xffffffff, 0x67,
1157         0x3504, 0xffffffff, 0x1082a,
1158         0x3500, 0xffffffff, 0x6a,
1159         0x3504, 0xffffffff, 0x1b082d,
1160         0x3500, 0xffffffff, 0x87,
1161         0x3504, 0xffffffff, 0x310851,
1162         0x3500, 0xffffffff, 0xba,
1163         0x3504, 0xffffffff, 0x891,
1164         0x3500, 0xffffffff, 0xbc,
1165         0x3504, 0xffffffff, 0x893,
1166         0x3500, 0xffffffff, 0xbe,
1167         0x3504, 0xffffffff, 0x20895,
1168         0x3500, 0xffffffff, 0xc2,
1169         0x3504, 0xffffffff, 0x20899,
1170         0x3500, 0xffffffff, 0xc6,
1171         0x3504, 0xffffffff, 0x2089d,
1172         0x3500, 0xffffffff, 0xca,
1173         0x3504, 0xffffffff, 0x8a1,
1174         0x3500, 0xffffffff, 0xcc,
1175         0x3504, 0xffffffff, 0x8a3,
1176         0x3500, 0xffffffff, 0xce,
1177         0x3504, 0xffffffff, 0x308a5,
1178         0x3500, 0xffffffff, 0xd3,
1179         0x3504, 0xffffffff, 0x6d08cd,
1180         0x3500, 0xffffffff, 0x142,
1181         0x3504, 0xffffffff, 0x2000095a,
1182         0x3504, 0xffffffff, 0x1,
1183         0x3500, 0xffffffff, 0x144,
1184         0x3504, 0xffffffff, 0x301f095b,
1185         0x3500, 0xffffffff, 0x165,
1186         0x3504, 0xffffffff, 0xc094d,
1187         0x3500, 0xffffffff, 0x173,
1188         0x3504, 0xffffffff, 0xf096d,
1189         0x3500, 0xffffffff, 0x184,
1190         0x3504, 0xffffffff, 0x15097f,
1191         0x3500, 0xffffffff, 0x19b,
1192         0x3504, 0xffffffff, 0xc0998,
1193         0x3500, 0xffffffff, 0x1a9,
1194         0x3504, 0xffffffff, 0x409a7,
1195         0x3500, 0xffffffff, 0x1af,
1196         0x3504, 0xffffffff, 0xcdc,
1197         0x3500, 0xffffffff, 0x1b1,
1198         0x3504, 0xffffffff, 0x800,
1199         0x3508, 0xffffffff, 0x6c9b2000,
1200         0x3510, 0xfc00, 0x2000,
1201         0x3544, 0xffffffff, 0xfc0,
1202         0x28d4, 0x00000100, 0x100
1203 };
1204
1205 static void si_init_golden_registers(struct radeon_device *rdev)
1206 {
1207         switch (rdev->family) {
1208         case CHIP_TAHITI:
1209                 radeon_program_register_sequence(rdev,
1210                                                  tahiti_golden_registers,
1211                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1212                 radeon_program_register_sequence(rdev,
1213                                                  tahiti_golden_rlc_registers,
1214                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1215                 radeon_program_register_sequence(rdev,
1216                                                  tahiti_mgcg_cgcg_init,
1217                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1218                 radeon_program_register_sequence(rdev,
1219                                                  tahiti_golden_registers2,
1220                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1221                 break;
1222         case CHIP_PITCAIRN:
1223                 radeon_program_register_sequence(rdev,
1224                                                  pitcairn_golden_registers,
1225                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1226                 radeon_program_register_sequence(rdev,
1227                                                  pitcairn_golden_rlc_registers,
1228                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1229                 radeon_program_register_sequence(rdev,
1230                                                  pitcairn_mgcg_cgcg_init,
1231                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1232                 break;
1233         case CHIP_VERDE:
1234                 radeon_program_register_sequence(rdev,
1235                                                  verde_golden_registers,
1236                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1237                 radeon_program_register_sequence(rdev,
1238                                                  verde_golden_rlc_registers,
1239                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1240                 radeon_program_register_sequence(rdev,
1241                                                  verde_mgcg_cgcg_init,
1242                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1243                 radeon_program_register_sequence(rdev,
1244                                                  verde_pg_init,
1245                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1246                 break;
1247         case CHIP_OLAND:
1248                 radeon_program_register_sequence(rdev,
1249                                                  oland_golden_registers,
1250                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1251                 radeon_program_register_sequence(rdev,
1252                                                  oland_golden_rlc_registers,
1253                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1254                 radeon_program_register_sequence(rdev,
1255                                                  oland_mgcg_cgcg_init,
1256                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1257                 break;
1258         case CHIP_HAINAN:
1259                 radeon_program_register_sequence(rdev,
1260                                                  hainan_golden_registers,
1261                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1262                 radeon_program_register_sequence(rdev,
1263                                                  hainan_golden_registers2,
1264                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1265                 radeon_program_register_sequence(rdev,
1266                                                  hainan_mgcg_cgcg_init,
1267                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1268                 break;
1269         default:
1270                 break;
1271         }
1272 }
1273
1274 /**
1275  * si_get_allowed_info_register - fetch the register for the info ioctl
1276  *
1277  * @rdev: radeon_device pointer
1278  * @reg: register offset in bytes
1279  * @val: register value
1280  *
1281  * Returns 0 for success or -EINVAL for an invalid register
1282  *
1283  */
1284 int si_get_allowed_info_register(struct radeon_device *rdev,
1285                                  u32 reg, u32 *val)
1286 {
1287         switch (reg) {
1288         case GRBM_STATUS:
1289         case GRBM_STATUS2:
1290         case GRBM_STATUS_SE0:
1291         case GRBM_STATUS_SE1:
1292         case SRBM_STATUS:
1293         case SRBM_STATUS2:
1294         case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1295         case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1296         case UVD_STATUS:
1297                 *val = RREG32(reg);
1298                 return 0;
1299         default:
1300                 return -EINVAL;
1301         }
1302 }
1303
1304 #define PCIE_BUS_CLK                10000
1305 #define TCLK                        (PCIE_BUS_CLK / 10)
1306
1307 /**
1308  * si_get_xclk - get the xclk
1309  *
1310  * @rdev: radeon_device pointer
1311  *
1312  * Returns the reference clock used by the gfx engine
1313  * (SI).
1314  */
1315 u32 si_get_xclk(struct radeon_device *rdev)
1316 {
1317         u32 reference_clock = rdev->clock.spll.reference_freq;
1318         u32 tmp;
1319
1320         tmp = RREG32(CG_CLKPIN_CNTL_2);
1321         if (tmp & MUX_TCLK_TO_XCLK)
1322                 return TCLK;
1323
1324         tmp = RREG32(CG_CLKPIN_CNTL);
1325         if (tmp & XTALIN_DIVIDE)
1326                 return reference_clock / 4;
1327
1328         return reference_clock;
1329 }
1330
1331 /* get temperature in millidegrees */
1332 int si_get_temp(struct radeon_device *rdev)
1333 {
1334         u32 temp;
1335         int actual_temp = 0;
1336
1337         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1338                 CTF_TEMP_SHIFT;
1339
1340         if (temp & 0x200)
1341                 actual_temp = 255;
1342         else
1343                 actual_temp = temp & 0x1ff;
1344
1345         actual_temp = (actual_temp * 1000);
1346
1347         return actual_temp;
1348 }
1349
1350 #define TAHITI_IO_MC_REGS_SIZE 36
1351
1352 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1353         {0x0000006f, 0x03044000},
1354         {0x00000070, 0x0480c018},
1355         {0x00000071, 0x00000040},
1356         {0x00000072, 0x01000000},
1357         {0x00000074, 0x000000ff},
1358         {0x00000075, 0x00143400},
1359         {0x00000076, 0x08ec0800},
1360         {0x00000077, 0x040000cc},
1361         {0x00000079, 0x00000000},
1362         {0x0000007a, 0x21000409},
1363         {0x0000007c, 0x00000000},
1364         {0x0000007d, 0xe8000000},
1365         {0x0000007e, 0x044408a8},
1366         {0x0000007f, 0x00000003},
1367         {0x00000080, 0x00000000},
1368         {0x00000081, 0x01000000},
1369         {0x00000082, 0x02000000},
1370         {0x00000083, 0x00000000},
1371         {0x00000084, 0xe3f3e4f4},
1372         {0x00000085, 0x00052024},
1373         {0x00000087, 0x00000000},
1374         {0x00000088, 0x66036603},
1375         {0x00000089, 0x01000000},
1376         {0x0000008b, 0x1c0a0000},
1377         {0x0000008c, 0xff010000},
1378         {0x0000008e, 0xffffefff},
1379         {0x0000008f, 0xfff3efff},
1380         {0x00000090, 0xfff3efbf},
1381         {0x00000094, 0x00101101},
1382         {0x00000095, 0x00000fff},
1383         {0x00000096, 0x00116fff},
1384         {0x00000097, 0x60010000},
1385         {0x00000098, 0x10010000},
1386         {0x00000099, 0x00006000},
1387         {0x0000009a, 0x00001000},
1388         {0x0000009f, 0x00a77400}
1389 };
1390
1391 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1392         {0x0000006f, 0x03044000},
1393         {0x00000070, 0x0480c018},
1394         {0x00000071, 0x00000040},
1395         {0x00000072, 0x01000000},
1396         {0x00000074, 0x000000ff},
1397         {0x00000075, 0x00143400},
1398         {0x00000076, 0x08ec0800},
1399         {0x00000077, 0x040000cc},
1400         {0x00000079, 0x00000000},
1401         {0x0000007a, 0x21000409},
1402         {0x0000007c, 0x00000000},
1403         {0x0000007d, 0xe8000000},
1404         {0x0000007e, 0x044408a8},
1405         {0x0000007f, 0x00000003},
1406         {0x00000080, 0x00000000},
1407         {0x00000081, 0x01000000},
1408         {0x00000082, 0x02000000},
1409         {0x00000083, 0x00000000},
1410         {0x00000084, 0xe3f3e4f4},
1411         {0x00000085, 0x00052024},
1412         {0x00000087, 0x00000000},
1413         {0x00000088, 0x66036603},
1414         {0x00000089, 0x01000000},
1415         {0x0000008b, 0x1c0a0000},
1416         {0x0000008c, 0xff010000},
1417         {0x0000008e, 0xffffefff},
1418         {0x0000008f, 0xfff3efff},
1419         {0x00000090, 0xfff3efbf},
1420         {0x00000094, 0x00101101},
1421         {0x00000095, 0x00000fff},
1422         {0x00000096, 0x00116fff},
1423         {0x00000097, 0x60010000},
1424         {0x00000098, 0x10010000},
1425         {0x00000099, 0x00006000},
1426         {0x0000009a, 0x00001000},
1427         {0x0000009f, 0x00a47400}
1428 };
1429
1430 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1431         {0x0000006f, 0x03044000},
1432         {0x00000070, 0x0480c018},
1433         {0x00000071, 0x00000040},
1434         {0x00000072, 0x01000000},
1435         {0x00000074, 0x000000ff},
1436         {0x00000075, 0x00143400},
1437         {0x00000076, 0x08ec0800},
1438         {0x00000077, 0x040000cc},
1439         {0x00000079, 0x00000000},
1440         {0x0000007a, 0x21000409},
1441         {0x0000007c, 0x00000000},
1442         {0x0000007d, 0xe8000000},
1443         {0x0000007e, 0x044408a8},
1444         {0x0000007f, 0x00000003},
1445         {0x00000080, 0x00000000},
1446         {0x00000081, 0x01000000},
1447         {0x00000082, 0x02000000},
1448         {0x00000083, 0x00000000},
1449         {0x00000084, 0xe3f3e4f4},
1450         {0x00000085, 0x00052024},
1451         {0x00000087, 0x00000000},
1452         {0x00000088, 0x66036603},
1453         {0x00000089, 0x01000000},
1454         {0x0000008b, 0x1c0a0000},
1455         {0x0000008c, 0xff010000},
1456         {0x0000008e, 0xffffefff},
1457         {0x0000008f, 0xfff3efff},
1458         {0x00000090, 0xfff3efbf},
1459         {0x00000094, 0x00101101},
1460         {0x00000095, 0x00000fff},
1461         {0x00000096, 0x00116fff},
1462         {0x00000097, 0x60010000},
1463         {0x00000098, 0x10010000},
1464         {0x00000099, 0x00006000},
1465         {0x0000009a, 0x00001000},
1466         {0x0000009f, 0x00a37400}
1467 };
1468
1469 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1470         {0x0000006f, 0x03044000},
1471         {0x00000070, 0x0480c018},
1472         {0x00000071, 0x00000040},
1473         {0x00000072, 0x01000000},
1474         {0x00000074, 0x000000ff},
1475         {0x00000075, 0x00143400},
1476         {0x00000076, 0x08ec0800},
1477         {0x00000077, 0x040000cc},
1478         {0x00000079, 0x00000000},
1479         {0x0000007a, 0x21000409},
1480         {0x0000007c, 0x00000000},
1481         {0x0000007d, 0xe8000000},
1482         {0x0000007e, 0x044408a8},
1483         {0x0000007f, 0x00000003},
1484         {0x00000080, 0x00000000},
1485         {0x00000081, 0x01000000},
1486         {0x00000082, 0x02000000},
1487         {0x00000083, 0x00000000},
1488         {0x00000084, 0xe3f3e4f4},
1489         {0x00000085, 0x00052024},
1490         {0x00000087, 0x00000000},
1491         {0x00000088, 0x66036603},
1492         {0x00000089, 0x01000000},
1493         {0x0000008b, 0x1c0a0000},
1494         {0x0000008c, 0xff010000},
1495         {0x0000008e, 0xffffefff},
1496         {0x0000008f, 0xfff3efff},
1497         {0x00000090, 0xfff3efbf},
1498         {0x00000094, 0x00101101},
1499         {0x00000095, 0x00000fff},
1500         {0x00000096, 0x00116fff},
1501         {0x00000097, 0x60010000},
1502         {0x00000098, 0x10010000},
1503         {0x00000099, 0x00006000},
1504         {0x0000009a, 0x00001000},
1505         {0x0000009f, 0x00a17730}
1506 };
1507
1508 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1509         {0x0000006f, 0x03044000},
1510         {0x00000070, 0x0480c018},
1511         {0x00000071, 0x00000040},
1512         {0x00000072, 0x01000000},
1513         {0x00000074, 0x000000ff},
1514         {0x00000075, 0x00143400},
1515         {0x00000076, 0x08ec0800},
1516         {0x00000077, 0x040000cc},
1517         {0x00000079, 0x00000000},
1518         {0x0000007a, 0x21000409},
1519         {0x0000007c, 0x00000000},
1520         {0x0000007d, 0xe8000000},
1521         {0x0000007e, 0x044408a8},
1522         {0x0000007f, 0x00000003},
1523         {0x00000080, 0x00000000},
1524         {0x00000081, 0x01000000},
1525         {0x00000082, 0x02000000},
1526         {0x00000083, 0x00000000},
1527         {0x00000084, 0xe3f3e4f4},
1528         {0x00000085, 0x00052024},
1529         {0x00000087, 0x00000000},
1530         {0x00000088, 0x66036603},
1531         {0x00000089, 0x01000000},
1532         {0x0000008b, 0x1c0a0000},
1533         {0x0000008c, 0xff010000},
1534         {0x0000008e, 0xffffefff},
1535         {0x0000008f, 0xfff3efff},
1536         {0x00000090, 0xfff3efbf},
1537         {0x00000094, 0x00101101},
1538         {0x00000095, 0x00000fff},
1539         {0x00000096, 0x00116fff},
1540         {0x00000097, 0x60010000},
1541         {0x00000098, 0x10010000},
1542         {0x00000099, 0x00006000},
1543         {0x0000009a, 0x00001000},
1544         {0x0000009f, 0x00a07730}
1545 };
1546
1547 /* ucode loading */
1548 int si_mc_load_microcode(struct radeon_device *rdev)
1549 {
1550         const __be32 *fw_data = NULL;
1551         const __le32 *new_fw_data = NULL;
1552         u32 running;
1553         u32 *io_mc_regs = NULL;
1554         const __le32 *new_io_mc_regs = NULL;
1555         int i, regs_size, ucode_size;
1556
1557         if (!rdev->mc_fw)
1558                 return -EINVAL;
1559
1560         if (rdev->new_fw) {
1561                 const struct mc_firmware_header_v1_0 *hdr =
1562                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1563
1564                 radeon_ucode_print_mc_hdr(&hdr->header);
1565                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1566                 new_io_mc_regs = (const __le32 *)
1567                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1568                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1569                 new_fw_data = (const __le32 *)
1570                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1571         } else {
1572                 ucode_size = rdev->mc_fw->size / 4;
1573
1574                 switch (rdev->family) {
1575                 case CHIP_TAHITI:
1576                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1577                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1578                         break;
1579                 case CHIP_PITCAIRN:
1580                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1581                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1582                         break;
1583                 case CHIP_VERDE:
1584                 default:
1585                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1586                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1587                         break;
1588                 case CHIP_OLAND:
1589                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1590                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1591                         break;
1592                 case CHIP_HAINAN:
1593                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1594                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1595                         break;
1596                 }
1597                 fw_data = (const __be32 *)rdev->mc_fw->data;
1598         }
1599
1600         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1601
1602         if (running == 0) {
1603                 /* reset the engine and set to writable */
1604                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1605                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1606
1607                 /* load mc io regs */
1608                 for (i = 0; i < regs_size; i++) {
1609                         if (rdev->new_fw) {
1610                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1611                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1612                         } else {
1613                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1614                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1615                         }
1616                 }
1617                 /* load the MC ucode */
1618                 for (i = 0; i < ucode_size; i++) {
1619                         if (rdev->new_fw)
1620                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1621                         else
1622                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1623                 }
1624
1625                 /* put the engine back into the active state */
1626                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1627                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1628                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1629
1630                 /* wait for training to complete */
1631                 for (i = 0; i < rdev->usec_timeout; i++) {
1632                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1633                                 break;
1634                         udelay(1);
1635                 }
1636                 for (i = 0; i < rdev->usec_timeout; i++) {
1637                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1638                                 break;
1639                         udelay(1);
1640                 }
1641         }
1642
1643         return 0;
1644 }
1645
1646 static int si_init_microcode(struct radeon_device *rdev)
1647 {
1648         const char *chip_name;
1649         const char *new_chip_name;
1650         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1651         size_t smc_req_size, mc2_req_size;
1652         char fw_name[30];
1653         int err;
1654         int new_fw = 0;
1655         bool new_smc = false;
1656         bool si58_fw = false;
1657         bool banks2_fw = false;
1658
1659         DRM_DEBUG("\n");
1660
1661         switch (rdev->family) {
1662         case CHIP_TAHITI:
1663                 chip_name = "TAHITI";
1664                 new_chip_name = "tahiti";
1665                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1666                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1667                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1668                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1669                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1670                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1671                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1672                 break;
1673         case CHIP_PITCAIRN:
1674                 chip_name = "PITCAIRN";
1675                 if ((rdev->pdev->revision == 0x81) &&
1676                     ((rdev->pdev->device == 0x6810) ||
1677                      (rdev->pdev->device == 0x6811)))
1678                         new_smc = true;
1679                 new_chip_name = "pitcairn";
1680                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1681                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1682                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1683                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1684                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1685                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1686                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1687                 break;
1688         case CHIP_VERDE:
1689                 chip_name = "VERDE";
1690                 if (((rdev->pdev->device == 0x6820) &&
1691                      ((rdev->pdev->revision == 0x81) ||
1692                       (rdev->pdev->revision == 0x83))) ||
1693                     ((rdev->pdev->device == 0x6821) &&
1694                      ((rdev->pdev->revision == 0x83) ||
1695                       (rdev->pdev->revision == 0x87))) ||
1696                     ((rdev->pdev->revision == 0x87) &&
1697                      ((rdev->pdev->device == 0x6823) ||
1698                       (rdev->pdev->device == 0x682b))))
1699                         new_smc = true;
1700                 new_chip_name = "verde";
1701                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1702                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1703                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1704                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1705                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1706                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1707                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1708                 break;
1709         case CHIP_OLAND:
1710                 chip_name = "OLAND";
1711                 if (((rdev->pdev->revision == 0x81) &&
1712                      ((rdev->pdev->device == 0x6600) ||
1713                       (rdev->pdev->device == 0x6604) ||
1714                       (rdev->pdev->device == 0x6605) ||
1715                       (rdev->pdev->device == 0x6610))) ||
1716                     ((rdev->pdev->revision == 0x83) &&
1717                      (rdev->pdev->device == 0x6610)))
1718                         new_smc = true;
1719                 new_chip_name = "oland";
1720                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1721                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1722                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1723                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1724                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1725                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1726                 break;
1727         case CHIP_HAINAN:
1728                 chip_name = "HAINAN";
1729                 if (((rdev->pdev->revision == 0x81) &&
1730                      (rdev->pdev->device == 0x6660)) ||
1731                     ((rdev->pdev->revision == 0x83) &&
1732                      ((rdev->pdev->device == 0x6660) ||
1733                       (rdev->pdev->device == 0x6663) ||
1734                       (rdev->pdev->device == 0x6665) ||
1735                       (rdev->pdev->device == 0x6667))))
1736                         new_smc = true;
1737                 else if ((rdev->pdev->revision == 0xc3) &&
1738                          (rdev->pdev->device == 0x6665))
1739                         banks2_fw = true;
1740                 new_chip_name = "hainan";
1741                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1742                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1743                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1744                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1745                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1746                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1747                 break;
1748         default: BUG();
1749         }
1750
1751         /* this memory configuration requires special firmware */
1752         if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1753                 si58_fw = true;
1754
1755         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1756
1757         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1758         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1759         if (err) {
1760                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1761                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1762                 if (err)
1763                         goto out;
1764                 if (rdev->pfp_fw->size != pfp_req_size) {
1765                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1766                                rdev->pfp_fw->size, fw_name);
1767                         err = -EINVAL;
1768                         goto out;
1769                 }
1770         } else {
1771                 err = radeon_ucode_validate(rdev->pfp_fw);
1772                 if (err) {
1773                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1774                                fw_name);
1775                         goto out;
1776                 } else {
1777                         new_fw++;
1778                 }
1779         }
1780
1781         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1782         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1783         if (err) {
1784                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1785                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1786                 if (err)
1787                         goto out;
1788                 if (rdev->me_fw->size != me_req_size) {
1789                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1790                                rdev->me_fw->size, fw_name);
1791                         err = -EINVAL;
1792                 }
1793         } else {
1794                 err = radeon_ucode_validate(rdev->me_fw);
1795                 if (err) {
1796                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1797                                fw_name);
1798                         goto out;
1799                 } else {
1800                         new_fw++;
1801                 }
1802         }
1803
1804         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1805         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1806         if (err) {
1807                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1808                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1809                 if (err)
1810                         goto out;
1811                 if (rdev->ce_fw->size != ce_req_size) {
1812                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1813                                rdev->ce_fw->size, fw_name);
1814                         err = -EINVAL;
1815                 }
1816         } else {
1817                 err = radeon_ucode_validate(rdev->ce_fw);
1818                 if (err) {
1819                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1820                                fw_name);
1821                         goto out;
1822                 } else {
1823                         new_fw++;
1824                 }
1825         }
1826
1827         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1828         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1829         if (err) {
1830                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1831                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1832                 if (err)
1833                         goto out;
1834                 if (rdev->rlc_fw->size != rlc_req_size) {
1835                         pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1836                                rdev->rlc_fw->size, fw_name);
1837                         err = -EINVAL;
1838                 }
1839         } else {
1840                 err = radeon_ucode_validate(rdev->rlc_fw);
1841                 if (err) {
1842                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1843                                fw_name);
1844                         goto out;
1845                 } else {
1846                         new_fw++;
1847                 }
1848         }
1849
1850         if (si58_fw)
1851                 snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1852         else
1853                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1854         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1855         if (err) {
1856                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1857                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1858                 if (err) {
1859                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1860                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1861                         if (err)
1862                                 goto out;
1863                 }
1864                 if ((rdev->mc_fw->size != mc_req_size) &&
1865                     (rdev->mc_fw->size != mc2_req_size)) {
1866                         pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1867                                rdev->mc_fw->size, fw_name);
1868                         err = -EINVAL;
1869                 }
1870                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1871         } else {
1872                 err = radeon_ucode_validate(rdev->mc_fw);
1873                 if (err) {
1874                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1875                                fw_name);
1876                         goto out;
1877                 } else {
1878                         new_fw++;
1879                 }
1880         }
1881
1882         if (banks2_fw)
1883                 snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1884         else if (new_smc)
1885                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1886         else
1887                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1888         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1889         if (err) {
1890                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1891                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1892                 if (err) {
1893                         pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1894                         release_firmware(rdev->smc_fw);
1895                         rdev->smc_fw = NULL;
1896                         err = 0;
1897                 } else if (rdev->smc_fw->size != smc_req_size) {
1898                         pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1899                                rdev->smc_fw->size, fw_name);
1900                         err = -EINVAL;
1901                 }
1902         } else {
1903                 err = radeon_ucode_validate(rdev->smc_fw);
1904                 if (err) {
1905                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1906                                fw_name);
1907                         goto out;
1908                 } else {
1909                         new_fw++;
1910                 }
1911         }
1912
1913         if (new_fw == 0) {
1914                 rdev->new_fw = false;
1915         } else if (new_fw < 6) {
1916                 pr_err("si_fw: mixing new and old firmware!\n");
1917                 err = -EINVAL;
1918         } else {
1919                 rdev->new_fw = true;
1920         }
1921 out:
1922         if (err) {
1923                 if (err != -EINVAL)
1924                         pr_err("si_cp: Failed to load firmware \"%s\"\n",
1925                                fw_name);
1926                 release_firmware(rdev->pfp_fw);
1927                 rdev->pfp_fw = NULL;
1928                 release_firmware(rdev->me_fw);
1929                 rdev->me_fw = NULL;
1930                 release_firmware(rdev->ce_fw);
1931                 rdev->ce_fw = NULL;
1932                 release_firmware(rdev->rlc_fw);
1933                 rdev->rlc_fw = NULL;
1934                 release_firmware(rdev->mc_fw);
1935                 rdev->mc_fw = NULL;
1936                 release_firmware(rdev->smc_fw);
1937                 rdev->smc_fw = NULL;
1938         }
1939         return err;
1940 }
1941
1942 /* watermark setup */
1943 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1944                                    struct radeon_crtc *radeon_crtc,
1945                                    struct drm_display_mode *mode,
1946                                    struct drm_display_mode *other_mode)
1947 {
1948         u32 tmp, buffer_alloc, i;
1949         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1950         /*
1951          * Line Buffer Setup
1952          * There are 3 line buffers, each one shared by 2 display controllers.
1953          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1954          * the display controllers.  The paritioning is done via one of four
1955          * preset allocations specified in bits 21:20:
1956          *  0 - half lb
1957          *  2 - whole lb, other crtc must be disabled
1958          */
1959         /* this can get tricky if we have two large displays on a paired group
1960          * of crtcs.  Ideally for multiple large displays we'd assign them to
1961          * non-linked crtcs for maximum line buffer allocation.
1962          */
1963         if (radeon_crtc->base.enabled && mode) {
1964                 if (other_mode) {
1965                         tmp = 0; /* 1/2 */
1966                         buffer_alloc = 1;
1967                 } else {
1968                         tmp = 2; /* whole */
1969                         buffer_alloc = 2;
1970                 }
1971         } else {
1972                 tmp = 0;
1973                 buffer_alloc = 0;
1974         }
1975
1976         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1977                DC_LB_MEMORY_CONFIG(tmp));
1978
1979         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1980                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1981         for (i = 0; i < rdev->usec_timeout; i++) {
1982                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1983                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1984                         break;
1985                 udelay(1);
1986         }
1987
1988         if (radeon_crtc->base.enabled && mode) {
1989                 switch (tmp) {
1990                 case 0:
1991                 default:
1992                         return 4096 * 2;
1993                 case 2:
1994                         return 8192 * 2;
1995                 }
1996         }
1997
1998         /* controller not enabled, so no lb used */
1999         return 0;
2000 }
2001
2002 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2003 {
2004         u32 tmp = RREG32(MC_SHARED_CHMAP);
2005
2006         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2007         case 0:
2008         default:
2009                 return 1;
2010         case 1:
2011                 return 2;
2012         case 2:
2013                 return 4;
2014         case 3:
2015                 return 8;
2016         case 4:
2017                 return 3;
2018         case 5:
2019                 return 6;
2020         case 6:
2021                 return 10;
2022         case 7:
2023                 return 12;
2024         case 8:
2025                 return 16;
2026         }
2027 }
2028
2029 struct dce6_wm_params {
2030         u32 dram_channels; /* number of dram channels */
2031         u32 yclk;          /* bandwidth per dram data pin in kHz */
2032         u32 sclk;          /* engine clock in kHz */
2033         u32 disp_clk;      /* display clock in kHz */
2034         u32 src_width;     /* viewport width */
2035         u32 active_time;   /* active display time in ns */
2036         u32 blank_time;    /* blank time in ns */
2037         bool interlaced;    /* mode is interlaced */
2038         fixed20_12 vsc;    /* vertical scale ratio */
2039         u32 num_heads;     /* number of active crtcs */
2040         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2041         u32 lb_size;       /* line buffer allocated to pipe */
2042         u32 vtaps;         /* vertical scaler taps */
2043 };
2044
2045 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2046 {
2047         /* Calculate raw DRAM Bandwidth */
2048         fixed20_12 dram_efficiency; /* 0.7 */
2049         fixed20_12 yclk, dram_channels, bandwidth;
2050         fixed20_12 a;
2051
2052         a.full = dfixed_const(1000);
2053         yclk.full = dfixed_const(wm->yclk);
2054         yclk.full = dfixed_div(yclk, a);
2055         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2056         a.full = dfixed_const(10);
2057         dram_efficiency.full = dfixed_const(7);
2058         dram_efficiency.full = dfixed_div(dram_efficiency, a);
2059         bandwidth.full = dfixed_mul(dram_channels, yclk);
2060         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2061
2062         return dfixed_trunc(bandwidth);
2063 }
2064
2065 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2066 {
2067         /* Calculate DRAM Bandwidth and the part allocated to display. */
2068         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2069         fixed20_12 yclk, dram_channels, bandwidth;
2070         fixed20_12 a;
2071
2072         a.full = dfixed_const(1000);
2073         yclk.full = dfixed_const(wm->yclk);
2074         yclk.full = dfixed_div(yclk, a);
2075         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2076         a.full = dfixed_const(10);
2077         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2078         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2079         bandwidth.full = dfixed_mul(dram_channels, yclk);
2080         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2081
2082         return dfixed_trunc(bandwidth);
2083 }
2084
2085 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2086 {
2087         /* Calculate the display Data return Bandwidth */
2088         fixed20_12 return_efficiency; /* 0.8 */
2089         fixed20_12 sclk, bandwidth;
2090         fixed20_12 a;
2091
2092         a.full = dfixed_const(1000);
2093         sclk.full = dfixed_const(wm->sclk);
2094         sclk.full = dfixed_div(sclk, a);
2095         a.full = dfixed_const(10);
2096         return_efficiency.full = dfixed_const(8);
2097         return_efficiency.full = dfixed_div(return_efficiency, a);
2098         a.full = dfixed_const(32);
2099         bandwidth.full = dfixed_mul(a, sclk);
2100         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2101
2102         return dfixed_trunc(bandwidth);
2103 }
2104
2105 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2106 {
2107         return 32;
2108 }
2109
2110 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2111 {
2112         /* Calculate the DMIF Request Bandwidth */
2113         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2114         fixed20_12 disp_clk, sclk, bandwidth;
2115         fixed20_12 a, b1, b2;
2116         u32 min_bandwidth;
2117
2118         a.full = dfixed_const(1000);
2119         disp_clk.full = dfixed_const(wm->disp_clk);
2120         disp_clk.full = dfixed_div(disp_clk, a);
2121         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2122         b1.full = dfixed_mul(a, disp_clk);
2123
2124         a.full = dfixed_const(1000);
2125         sclk.full = dfixed_const(wm->sclk);
2126         sclk.full = dfixed_div(sclk, a);
2127         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2128         b2.full = dfixed_mul(a, sclk);
2129
2130         a.full = dfixed_const(10);
2131         disp_clk_request_efficiency.full = dfixed_const(8);
2132         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2133
2134         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2135
2136         a.full = dfixed_const(min_bandwidth);
2137         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2138
2139         return dfixed_trunc(bandwidth);
2140 }
2141
2142 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2143 {
2144         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2145         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2146         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2147         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2148
2149         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2150 }
2151
2152 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2153 {
2154         /* Calculate the display mode Average Bandwidth
2155          * DisplayMode should contain the source and destination dimensions,
2156          * timing, etc.
2157          */
2158         fixed20_12 bpp;
2159         fixed20_12 line_time;
2160         fixed20_12 src_width;
2161         fixed20_12 bandwidth;
2162         fixed20_12 a;
2163
2164         a.full = dfixed_const(1000);
2165         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2166         line_time.full = dfixed_div(line_time, a);
2167         bpp.full = dfixed_const(wm->bytes_per_pixel);
2168         src_width.full = dfixed_const(wm->src_width);
2169         bandwidth.full = dfixed_mul(src_width, bpp);
2170         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2171         bandwidth.full = dfixed_div(bandwidth, line_time);
2172
2173         return dfixed_trunc(bandwidth);
2174 }
2175
2176 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2177 {
2178         /* First calcualte the latency in ns */
2179         u32 mc_latency = 2000; /* 2000 ns. */
2180         u32 available_bandwidth = dce6_available_bandwidth(wm);
2181         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2182         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2183         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2184         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2185                 (wm->num_heads * cursor_line_pair_return_time);
2186         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2187         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2188         u32 tmp, dmif_size = 12288;
2189         fixed20_12 a, b, c;
2190
2191         if (wm->num_heads == 0)
2192                 return 0;
2193
2194         a.full = dfixed_const(2);
2195         b.full = dfixed_const(1);
2196         if ((wm->vsc.full > a.full) ||
2197             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2198             (wm->vtaps >= 5) ||
2199             ((wm->vsc.full >= a.full) && wm->interlaced))
2200                 max_src_lines_per_dst_line = 4;
2201         else
2202                 max_src_lines_per_dst_line = 2;
2203
2204         a.full = dfixed_const(available_bandwidth);
2205         b.full = dfixed_const(wm->num_heads);
2206         a.full = dfixed_div(a, b);
2207
2208         b.full = dfixed_const(mc_latency + 512);
2209         c.full = dfixed_const(wm->disp_clk);
2210         b.full = dfixed_div(b, c);
2211
2212         c.full = dfixed_const(dmif_size);
2213         b.full = dfixed_div(c, b);
2214
2215         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2216
2217         b.full = dfixed_const(1000);
2218         c.full = dfixed_const(wm->disp_clk);
2219         b.full = dfixed_div(c, b);
2220         c.full = dfixed_const(wm->bytes_per_pixel);
2221         b.full = dfixed_mul(b, c);
2222
2223         lb_fill_bw = min(tmp, dfixed_trunc(b));
2224
2225         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2226         b.full = dfixed_const(1000);
2227         c.full = dfixed_const(lb_fill_bw);
2228         b.full = dfixed_div(c, b);
2229         a.full = dfixed_div(a, b);
2230         line_fill_time = dfixed_trunc(a);
2231
2232         if (line_fill_time < wm->active_time)
2233                 return latency;
2234         else
2235                 return latency + (line_fill_time - wm->active_time);
2236
2237 }
2238
2239 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2240 {
2241         if (dce6_average_bandwidth(wm) <=
2242             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2243                 return true;
2244         else
2245                 return false;
2246 };
2247
2248 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2249 {
2250         if (dce6_average_bandwidth(wm) <=
2251             (dce6_available_bandwidth(wm) / wm->num_heads))
2252                 return true;
2253         else
2254                 return false;
2255 };
2256
2257 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2258 {
2259         u32 lb_partitions = wm->lb_size / wm->src_width;
2260         u32 line_time = wm->active_time + wm->blank_time;
2261         u32 latency_tolerant_lines;
2262         u32 latency_hiding;
2263         fixed20_12 a;
2264
2265         a.full = dfixed_const(1);
2266         if (wm->vsc.full > a.full)
2267                 latency_tolerant_lines = 1;
2268         else {
2269                 if (lb_partitions <= (wm->vtaps + 1))
2270                         latency_tolerant_lines = 1;
2271                 else
2272                         latency_tolerant_lines = 2;
2273         }
2274
2275         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2276
2277         if (dce6_latency_watermark(wm) <= latency_hiding)
2278                 return true;
2279         else
2280                 return false;
2281 }
2282
2283 static void dce6_program_watermarks(struct radeon_device *rdev,
2284                                          struct radeon_crtc *radeon_crtc,
2285                                          u32 lb_size, u32 num_heads)
2286 {
2287         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2288         struct dce6_wm_params wm_low, wm_high;
2289         u32 dram_channels;
2290         u32 pixel_period;
2291         u32 line_time = 0;
2292         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2293         u32 priority_a_mark = 0, priority_b_mark = 0;
2294         u32 priority_a_cnt = PRIORITY_OFF;
2295         u32 priority_b_cnt = PRIORITY_OFF;
2296         u32 tmp, arb_control3;
2297         fixed20_12 a, b, c;
2298
2299         if (radeon_crtc->base.enabled && num_heads && mode) {
2300                 pixel_period = 1000000 / (u32)mode->clock;
2301                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2302                 priority_a_cnt = 0;
2303                 priority_b_cnt = 0;
2304
2305                 if (rdev->family == CHIP_ARUBA)
2306                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2307                 else
2308                         dram_channels = si_get_number_of_dram_channels(rdev);
2309
2310                 /* watermark for high clocks */
2311                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2312                         wm_high.yclk =
2313                                 radeon_dpm_get_mclk(rdev, false) * 10;
2314                         wm_high.sclk =
2315                                 radeon_dpm_get_sclk(rdev, false) * 10;
2316                 } else {
2317                         wm_high.yclk = rdev->pm.current_mclk * 10;
2318                         wm_high.sclk = rdev->pm.current_sclk * 10;
2319                 }
2320
2321                 wm_high.disp_clk = mode->clock;
2322                 wm_high.src_width = mode->crtc_hdisplay;
2323                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2324                 wm_high.blank_time = line_time - wm_high.active_time;
2325                 wm_high.interlaced = false;
2326                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2327                         wm_high.interlaced = true;
2328                 wm_high.vsc = radeon_crtc->vsc;
2329                 wm_high.vtaps = 1;
2330                 if (radeon_crtc->rmx_type != RMX_OFF)
2331                         wm_high.vtaps = 2;
2332                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2333                 wm_high.lb_size = lb_size;
2334                 wm_high.dram_channels = dram_channels;
2335                 wm_high.num_heads = num_heads;
2336
2337                 /* watermark for low clocks */
2338                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2339                         wm_low.yclk =
2340                                 radeon_dpm_get_mclk(rdev, true) * 10;
2341                         wm_low.sclk =
2342                                 radeon_dpm_get_sclk(rdev, true) * 10;
2343                 } else {
2344                         wm_low.yclk = rdev->pm.current_mclk * 10;
2345                         wm_low.sclk = rdev->pm.current_sclk * 10;
2346                 }
2347
2348                 wm_low.disp_clk = mode->clock;
2349                 wm_low.src_width = mode->crtc_hdisplay;
2350                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2351                 wm_low.blank_time = line_time - wm_low.active_time;
2352                 wm_low.interlaced = false;
2353                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2354                         wm_low.interlaced = true;
2355                 wm_low.vsc = radeon_crtc->vsc;
2356                 wm_low.vtaps = 1;
2357                 if (radeon_crtc->rmx_type != RMX_OFF)
2358                         wm_low.vtaps = 2;
2359                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2360                 wm_low.lb_size = lb_size;
2361                 wm_low.dram_channels = dram_channels;
2362                 wm_low.num_heads = num_heads;
2363
2364                 /* set for high clocks */
2365                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2366                 /* set for low clocks */
2367                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2368
2369                 /* possibly force display priority to high */
2370                 /* should really do this at mode validation time... */
2371                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2372                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2373                     !dce6_check_latency_hiding(&wm_high) ||
2374                     (rdev->disp_priority == 2)) {
2375                         DRM_DEBUG_KMS("force priority to high\n");
2376                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2377                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2378                 }
2379                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2380                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2381                     !dce6_check_latency_hiding(&wm_low) ||
2382                     (rdev->disp_priority == 2)) {
2383                         DRM_DEBUG_KMS("force priority to high\n");
2384                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2385                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2386                 }
2387
2388                 a.full = dfixed_const(1000);
2389                 b.full = dfixed_const(mode->clock);
2390                 b.full = dfixed_div(b, a);
2391                 c.full = dfixed_const(latency_watermark_a);
2392                 c.full = dfixed_mul(c, b);
2393                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2394                 c.full = dfixed_div(c, a);
2395                 a.full = dfixed_const(16);
2396                 c.full = dfixed_div(c, a);
2397                 priority_a_mark = dfixed_trunc(c);
2398                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2399
2400                 a.full = dfixed_const(1000);
2401                 b.full = dfixed_const(mode->clock);
2402                 b.full = dfixed_div(b, a);
2403                 c.full = dfixed_const(latency_watermark_b);
2404                 c.full = dfixed_mul(c, b);
2405                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2406                 c.full = dfixed_div(c, a);
2407                 a.full = dfixed_const(16);
2408                 c.full = dfixed_div(c, a);
2409                 priority_b_mark = dfixed_trunc(c);
2410                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2411
2412                 /* Save number of lines the linebuffer leads before the scanout */
2413                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2414         }
2415
2416         /* select wm A */
2417         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2418         tmp = arb_control3;
2419         tmp &= ~LATENCY_WATERMARK_MASK(3);
2420         tmp |= LATENCY_WATERMARK_MASK(1);
2421         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2422         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2423                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2424                 LATENCY_HIGH_WATERMARK(line_time)));
2425         /* select wm B */
2426         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2427         tmp &= ~LATENCY_WATERMARK_MASK(3);
2428         tmp |= LATENCY_WATERMARK_MASK(2);
2429         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2430         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2431                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2432                 LATENCY_HIGH_WATERMARK(line_time)));
2433         /* restore original selection */
2434         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2435
2436         /* write the priority marks */
2437         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2438         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2439
2440         /* save values for DPM */
2441         radeon_crtc->line_time = line_time;
2442         radeon_crtc->wm_high = latency_watermark_a;
2443         radeon_crtc->wm_low = latency_watermark_b;
2444 }
2445
2446 void dce6_bandwidth_update(struct radeon_device *rdev)
2447 {
2448         struct drm_display_mode *mode0 = NULL;
2449         struct drm_display_mode *mode1 = NULL;
2450         u32 num_heads = 0, lb_size;
2451         int i;
2452
2453         if (!rdev->mode_info.mode_config_initialized)
2454                 return;
2455
2456         radeon_update_display_priority(rdev);
2457
2458         for (i = 0; i < rdev->num_crtc; i++) {
2459                 if (rdev->mode_info.crtcs[i]->base.enabled)
2460                         num_heads++;
2461         }
2462         for (i = 0; i < rdev->num_crtc; i += 2) {
2463                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2464                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2465                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2466                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2467                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2468                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2469         }
2470 }
2471
2472 /*
2473  * Core functions
2474  */
2475 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2476 {
2477         u32 *tile = rdev->config.si.tile_mode_array;
2478         const u32 num_tile_mode_states =
2479                         ARRAY_SIZE(rdev->config.si.tile_mode_array);
2480         u32 reg_offset, split_equal_to_row_size;
2481
2482         switch (rdev->config.si.mem_row_size_in_kb) {
2483         case 1:
2484                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2485                 break;
2486         case 2:
2487         default:
2488                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2489                 break;
2490         case 4:
2491                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2492                 break;
2493         }
2494
2495         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2496                 tile[reg_offset] = 0;
2497
2498         switch(rdev->family) {
2499         case CHIP_TAHITI:
2500         case CHIP_PITCAIRN:
2501                 /* non-AA compressed depth or any compressed stencil */
2502                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2504                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2505                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2506                            NUM_BANKS(ADDR_SURF_16_BANK) |
2507                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2510                 /* 2xAA/4xAA compressed depth only */
2511                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2513                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2514                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515                            NUM_BANKS(ADDR_SURF_16_BANK) |
2516                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2519                 /* 8xAA compressed depth only */
2520                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2522                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2523                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2524                            NUM_BANKS(ADDR_SURF_16_BANK) |
2525                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2527                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2528                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2529                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2531                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2532                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2533                            NUM_BANKS(ADDR_SURF_16_BANK) |
2534                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2537                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2538                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2539                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2540                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2541                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2542                            NUM_BANKS(ADDR_SURF_16_BANK) |
2543                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2545                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2546                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2547                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2549                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2550                            TILE_SPLIT(split_equal_to_row_size) |
2551                            NUM_BANKS(ADDR_SURF_16_BANK) |
2552                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2554                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2555                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2556                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2558                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2559                            TILE_SPLIT(split_equal_to_row_size) |
2560                            NUM_BANKS(ADDR_SURF_16_BANK) |
2561                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2563                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2564                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2565                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2567                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2568                            TILE_SPLIT(split_equal_to_row_size) |
2569                            NUM_BANKS(ADDR_SURF_16_BANK) |
2570                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2572                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573                 /* 1D and 1D Array Surfaces */
2574                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2575                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2577                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2578                            NUM_BANKS(ADDR_SURF_16_BANK) |
2579                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2581                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2582                 /* Displayable maps. */
2583                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2584                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2585                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2587                            NUM_BANKS(ADDR_SURF_16_BANK) |
2588                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2591                 /* Display 8bpp. */
2592                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2594                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2596                            NUM_BANKS(ADDR_SURF_16_BANK) |
2597                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2600                 /* Display 16bpp. */
2601                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2604                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2605                            NUM_BANKS(ADDR_SURF_16_BANK) |
2606                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2608                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2609                 /* Display 32bpp. */
2610                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2611                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2613                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2614                            NUM_BANKS(ADDR_SURF_16_BANK) |
2615                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2618                 /* Thin. */
2619                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2620                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2621                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2622                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2623                            NUM_BANKS(ADDR_SURF_16_BANK) |
2624                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2627                 /* Thin 8 bpp. */
2628                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2630                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2631                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2632                            NUM_BANKS(ADDR_SURF_16_BANK) |
2633                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2635                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2636                 /* Thin 16 bpp. */
2637                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2638                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2639                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2640                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2641                            NUM_BANKS(ADDR_SURF_16_BANK) |
2642                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2644                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2645                 /* Thin 32 bpp. */
2646                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2648                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2650                            NUM_BANKS(ADDR_SURF_16_BANK) |
2651                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2654                 /* Thin 64 bpp. */
2655                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2658                            TILE_SPLIT(split_equal_to_row_size) |
2659                            NUM_BANKS(ADDR_SURF_16_BANK) |
2660                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2663                 /* 8 bpp PRT. */
2664                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2667                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2668                            NUM_BANKS(ADDR_SURF_16_BANK) |
2669                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2670                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2671                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2672                 /* 16 bpp PRT */
2673                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2675                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2676                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2677                            NUM_BANKS(ADDR_SURF_16_BANK) |
2678                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2680                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2681                 /* 32 bpp PRT */
2682                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2684                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2686                            NUM_BANKS(ADDR_SURF_16_BANK) |
2687                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2689                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2690                 /* 64 bpp PRT */
2691                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2695                            NUM_BANKS(ADDR_SURF_16_BANK) |
2696                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2698                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2699                 /* 128 bpp PRT */
2700                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2702                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2703                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2704                            NUM_BANKS(ADDR_SURF_8_BANK) |
2705                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2707                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2708
2709                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2710                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2711                 break;
2712
2713         case CHIP_VERDE:
2714         case CHIP_OLAND:
2715         case CHIP_HAINAN:
2716                 /* non-AA compressed depth or any compressed stencil */
2717                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2719                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2720                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2721                            NUM_BANKS(ADDR_SURF_16_BANK) |
2722                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2725                 /* 2xAA/4xAA compressed depth only */
2726                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2727                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2728                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2730                            NUM_BANKS(ADDR_SURF_16_BANK) |
2731                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2733                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2734                 /* 8xAA compressed depth only */
2735                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2739                            NUM_BANKS(ADDR_SURF_16_BANK) |
2740                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2743                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2744                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2746                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2748                            NUM_BANKS(ADDR_SURF_16_BANK) |
2749                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2752                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2753                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2755                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2757                            NUM_BANKS(ADDR_SURF_16_BANK) |
2758                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2760                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2761                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2762                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2764                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765                            TILE_SPLIT(split_equal_to_row_size) |
2766                            NUM_BANKS(ADDR_SURF_16_BANK) |
2767                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2769                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2770                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2771                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2772                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2773                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774                            TILE_SPLIT(split_equal_to_row_size) |
2775                            NUM_BANKS(ADDR_SURF_16_BANK) |
2776                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2779                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2780                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2782                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783                            TILE_SPLIT(split_equal_to_row_size) |
2784                            NUM_BANKS(ADDR_SURF_16_BANK) |
2785                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2787                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2788                 /* 1D and 1D Array Surfaces */
2789                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2790                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2791                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2792                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2793                            NUM_BANKS(ADDR_SURF_16_BANK) |
2794                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2796                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2797                 /* Displayable maps. */
2798                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2799                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2800                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2802                            NUM_BANKS(ADDR_SURF_16_BANK) |
2803                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2805                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2806                 /* Display 8bpp. */
2807                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2811                            NUM_BANKS(ADDR_SURF_16_BANK) |
2812                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2814                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2815                 /* Display 16bpp. */
2816                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2817                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2818                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2820                            NUM_BANKS(ADDR_SURF_16_BANK) |
2821                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2824                 /* Display 32bpp. */
2825                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2829                            NUM_BANKS(ADDR_SURF_16_BANK) |
2830                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2832                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2833                 /* Thin. */
2834                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2835                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2836                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2838                            NUM_BANKS(ADDR_SURF_16_BANK) |
2839                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2841                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2842                 /* Thin 8 bpp. */
2843                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2844                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2845                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2846                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2847                            NUM_BANKS(ADDR_SURF_16_BANK) |
2848                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2851                 /* Thin 16 bpp. */
2852                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2853                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2854                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2855                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2856                            NUM_BANKS(ADDR_SURF_16_BANK) |
2857                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2860                 /* Thin 32 bpp. */
2861                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2862                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2863                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2864                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2865                            NUM_BANKS(ADDR_SURF_16_BANK) |
2866                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2868                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2869                 /* Thin 64 bpp. */
2870                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2872                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873                            TILE_SPLIT(split_equal_to_row_size) |
2874                            NUM_BANKS(ADDR_SURF_16_BANK) |
2875                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2877                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2878                 /* 8 bpp PRT. */
2879                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2881                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2882                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2883                            NUM_BANKS(ADDR_SURF_16_BANK) |
2884                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2885                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2886                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2887                 /* 16 bpp PRT */
2888                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2890                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2891                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2892                            NUM_BANKS(ADDR_SURF_16_BANK) |
2893                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2895                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2896                 /* 32 bpp PRT */
2897                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2899                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2900                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2901                            NUM_BANKS(ADDR_SURF_16_BANK) |
2902                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2903                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2904                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2905                 /* 64 bpp PRT */
2906                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2908                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2909                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2910                            NUM_BANKS(ADDR_SURF_16_BANK) |
2911                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2913                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2914                 /* 128 bpp PRT */
2915                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2917                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2918                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2919                            NUM_BANKS(ADDR_SURF_8_BANK) |
2920                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2922                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2923
2924                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2925                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2926                 break;
2927
2928         default:
2929                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2930         }
2931 }
2932
2933 static void si_select_se_sh(struct radeon_device *rdev,
2934                             u32 se_num, u32 sh_num)
2935 {
2936         u32 data = INSTANCE_BROADCAST_WRITES;
2937
2938         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2939                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2940         else if (se_num == 0xffffffff)
2941                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2942         else if (sh_num == 0xffffffff)
2943                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2944         else
2945                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2946         WREG32(GRBM_GFX_INDEX, data);
2947 }
2948
2949 static u32 si_create_bitmask(u32 bit_width)
2950 {
2951         u32 i, mask = 0;
2952
2953         for (i = 0; i < bit_width; i++) {
2954                 mask <<= 1;
2955                 mask |= 1;
2956         }
2957         return mask;
2958 }
2959
2960 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2961 {
2962         u32 data, mask;
2963
2964         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2965         if (data & 1)
2966                 data &= INACTIVE_CUS_MASK;
2967         else
2968                 data = 0;
2969         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2970
2971         data >>= INACTIVE_CUS_SHIFT;
2972
2973         mask = si_create_bitmask(cu_per_sh);
2974
2975         return ~data & mask;
2976 }
2977
2978 static void si_setup_spi(struct radeon_device *rdev,
2979                          u32 se_num, u32 sh_per_se,
2980                          u32 cu_per_sh)
2981 {
2982         int i, j, k;
2983         u32 data, mask, active_cu;
2984
2985         for (i = 0; i < se_num; i++) {
2986                 for (j = 0; j < sh_per_se; j++) {
2987                         si_select_se_sh(rdev, i, j);
2988                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2989                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2990
2991                         mask = 1;
2992                         for (k = 0; k < 16; k++) {
2993                                 mask <<= k;
2994                                 if (active_cu & mask) {
2995                                         data &= ~mask;
2996                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2997                                         break;
2998                                 }
2999                         }
3000                 }
3001         }
3002         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3003 }
3004
3005 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3006                               u32 max_rb_num_per_se,
3007                               u32 sh_per_se)
3008 {
3009         u32 data, mask;
3010
3011         data = RREG32(CC_RB_BACKEND_DISABLE);
3012         if (data & 1)
3013                 data &= BACKEND_DISABLE_MASK;
3014         else
3015                 data = 0;
3016         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3017
3018         data >>= BACKEND_DISABLE_SHIFT;
3019
3020         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3021
3022         return data & mask;
3023 }
3024
3025 static void si_setup_rb(struct radeon_device *rdev,
3026                         u32 se_num, u32 sh_per_se,
3027                         u32 max_rb_num_per_se)
3028 {
3029         int i, j;
3030         u32 data, mask;
3031         u32 disabled_rbs = 0;
3032         u32 enabled_rbs = 0;
3033
3034         for (i = 0; i < se_num; i++) {
3035                 for (j = 0; j < sh_per_se; j++) {
3036                         si_select_se_sh(rdev, i, j);
3037                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3038                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3039                 }
3040         }
3041         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3042
3043         mask = 1;
3044         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3045                 if (!(disabled_rbs & mask))
3046                         enabled_rbs |= mask;
3047                 mask <<= 1;
3048         }
3049
3050         rdev->config.si.backend_enable_mask = enabled_rbs;
3051
3052         for (i = 0; i < se_num; i++) {
3053                 si_select_se_sh(rdev, i, 0xffffffff);
3054                 data = 0;
3055                 for (j = 0; j < sh_per_se; j++) {
3056                         switch (enabled_rbs & 3) {
3057                         case 1:
3058                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3059                                 break;
3060                         case 2:
3061                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3062                                 break;
3063                         case 3:
3064                         default:
3065                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3066                                 break;
3067                         }
3068                         enabled_rbs >>= 2;
3069                 }
3070                 WREG32(PA_SC_RASTER_CONFIG, data);
3071         }
3072         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3073 }
3074
3075 static void si_gpu_init(struct radeon_device *rdev)
3076 {
3077         u32 gb_addr_config = 0;
3078         u32 mc_shared_chmap, mc_arb_ramcfg;
3079         u32 sx_debug_1;
3080         u32 hdp_host_path_cntl;
3081         u32 tmp;
3082         int i, j;
3083
3084         switch (rdev->family) {
3085         case CHIP_TAHITI:
3086                 rdev->config.si.max_shader_engines = 2;
3087                 rdev->config.si.max_tile_pipes = 12;
3088                 rdev->config.si.max_cu_per_sh = 8;
3089                 rdev->config.si.max_sh_per_se = 2;
3090                 rdev->config.si.max_backends_per_se = 4;
3091                 rdev->config.si.max_texture_channel_caches = 12;
3092                 rdev->config.si.max_gprs = 256;
3093                 rdev->config.si.max_gs_threads = 32;
3094                 rdev->config.si.max_hw_contexts = 8;
3095
3096                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3097                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3098                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3099                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3100                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3101                 break;
3102         case CHIP_PITCAIRN:
3103                 rdev->config.si.max_shader_engines = 2;
3104                 rdev->config.si.max_tile_pipes = 8;
3105                 rdev->config.si.max_cu_per_sh = 5;
3106                 rdev->config.si.max_sh_per_se = 2;
3107                 rdev->config.si.max_backends_per_se = 4;
3108                 rdev->config.si.max_texture_channel_caches = 8;
3109                 rdev->config.si.max_gprs = 256;
3110                 rdev->config.si.max_gs_threads = 32;
3111                 rdev->config.si.max_hw_contexts = 8;
3112
3113                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3114                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3115                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3116                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3117                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3118                 break;
3119         case CHIP_VERDE:
3120         default:
3121                 rdev->config.si.max_shader_engines = 1;
3122                 rdev->config.si.max_tile_pipes = 4;
3123                 rdev->config.si.max_cu_per_sh = 5;
3124                 rdev->config.si.max_sh_per_se = 2;
3125                 rdev->config.si.max_backends_per_se = 4;
3126                 rdev->config.si.max_texture_channel_caches = 4;
3127                 rdev->config.si.max_gprs = 256;
3128                 rdev->config.si.max_gs_threads = 32;
3129                 rdev->config.si.max_hw_contexts = 8;
3130
3131                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3132                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3133                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3134                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3135                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3136                 break;
3137         case CHIP_OLAND:
3138                 rdev->config.si.max_shader_engines = 1;
3139                 rdev->config.si.max_tile_pipes = 4;
3140                 rdev->config.si.max_cu_per_sh = 6;
3141                 rdev->config.si.max_sh_per_se = 1;
3142                 rdev->config.si.max_backends_per_se = 2;
3143                 rdev->config.si.max_texture_channel_caches = 4;
3144                 rdev->config.si.max_gprs = 256;
3145                 rdev->config.si.max_gs_threads = 16;
3146                 rdev->config.si.max_hw_contexts = 8;
3147
3148                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3149                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3150                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3151                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3152                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3153                 break;
3154         case CHIP_HAINAN:
3155                 rdev->config.si.max_shader_engines = 1;
3156                 rdev->config.si.max_tile_pipes = 4;
3157                 rdev->config.si.max_cu_per_sh = 5;
3158                 rdev->config.si.max_sh_per_se = 1;
3159                 rdev->config.si.max_backends_per_se = 1;
3160                 rdev->config.si.max_texture_channel_caches = 2;
3161                 rdev->config.si.max_gprs = 256;
3162                 rdev->config.si.max_gs_threads = 16;
3163                 rdev->config.si.max_hw_contexts = 8;
3164
3165                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3166                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3167                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3168                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3169                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3170                 break;
3171         }
3172
3173         /* Initialize HDP */
3174         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3175                 WREG32((0x2c14 + j), 0x00000000);
3176                 WREG32((0x2c18 + j), 0x00000000);
3177                 WREG32((0x2c1c + j), 0x00000000);
3178                 WREG32((0x2c20 + j), 0x00000000);
3179                 WREG32((0x2c24 + j), 0x00000000);
3180         }
3181
3182         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3183         WREG32(SRBM_INT_CNTL, 1);
3184         WREG32(SRBM_INT_ACK, 1);
3185
3186         evergreen_fix_pci_max_read_req_size(rdev);
3187
3188         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3189
3190         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3191         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3192
3193         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3194         rdev->config.si.mem_max_burst_length_bytes = 256;
3195         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3196         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3197         if (rdev->config.si.mem_row_size_in_kb > 4)
3198                 rdev->config.si.mem_row_size_in_kb = 4;
3199         /* XXX use MC settings? */
3200         rdev->config.si.shader_engine_tile_size = 32;
3201         rdev->config.si.num_gpus = 1;
3202         rdev->config.si.multi_gpu_tile_size = 64;
3203
3204         /* fix up row size */
3205         gb_addr_config &= ~ROW_SIZE_MASK;
3206         switch (rdev->config.si.mem_row_size_in_kb) {
3207         case 1:
3208         default:
3209                 gb_addr_config |= ROW_SIZE(0);
3210                 break;
3211         case 2:
3212                 gb_addr_config |= ROW_SIZE(1);
3213                 break;
3214         case 4:
3215                 gb_addr_config |= ROW_SIZE(2);
3216                 break;
3217         }
3218
3219         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3220          * not have bank info, so create a custom tiling dword.
3221          * bits 3:0   num_pipes
3222          * bits 7:4   num_banks
3223          * bits 11:8  group_size
3224          * bits 15:12 row_size
3225          */
3226         rdev->config.si.tile_config = 0;
3227         switch (rdev->config.si.num_tile_pipes) {
3228         case 1:
3229                 rdev->config.si.tile_config |= (0 << 0);
3230                 break;
3231         case 2:
3232                 rdev->config.si.tile_config |= (1 << 0);
3233                 break;
3234         case 4:
3235                 rdev->config.si.tile_config |= (2 << 0);
3236                 break;
3237         case 8:
3238         default:
3239                 /* XXX what about 12? */
3240                 rdev->config.si.tile_config |= (3 << 0);
3241                 break;
3242         }       
3243         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3244         case 0: /* four banks */
3245                 rdev->config.si.tile_config |= 0 << 4;
3246                 break;
3247         case 1: /* eight banks */
3248                 rdev->config.si.tile_config |= 1 << 4;
3249                 break;
3250         case 2: /* sixteen banks */
3251         default:
3252                 rdev->config.si.tile_config |= 2 << 4;
3253                 break;
3254         }
3255         rdev->config.si.tile_config |=
3256                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3257         rdev->config.si.tile_config |=
3258                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3259
3260         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3261         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3262         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3263         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3264         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3265         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3266         if (rdev->has_uvd) {
3267                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3268                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3269                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3270         }
3271
3272         si_tiling_mode_table_init(rdev);
3273
3274         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3275                     rdev->config.si.max_sh_per_se,
3276                     rdev->config.si.max_backends_per_se);
3277
3278         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3279                      rdev->config.si.max_sh_per_se,
3280                      rdev->config.si.max_cu_per_sh);
3281
3282         rdev->config.si.active_cus = 0;
3283         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3284                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3285                         rdev->config.si.active_cus +=
3286                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3287                 }
3288         }
3289
3290         /* set HW defaults for 3D engine */
3291         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3292                                      ROQ_IB2_START(0x2b)));
3293         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3294
3295         sx_debug_1 = RREG32(SX_DEBUG_1);
3296         WREG32(SX_DEBUG_1, sx_debug_1);
3297
3298         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3299
3300         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3301                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3302                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3303                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3304
3305         WREG32(VGT_NUM_INSTANCES, 1);
3306
3307         WREG32(CP_PERFMON_CNTL, 0);
3308
3309         WREG32(SQ_CONFIG, 0);
3310
3311         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3312                                           FORCE_EOV_MAX_REZ_CNT(255)));
3313
3314         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3315                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3316
3317         WREG32(VGT_GS_VERTEX_REUSE, 16);
3318         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3319
3320         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3321         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3322         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3323         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3324         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3325         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3326         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3327         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3328
3329         tmp = RREG32(HDP_MISC_CNTL);
3330         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3331         WREG32(HDP_MISC_CNTL, tmp);
3332
3333         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3334         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3335
3336         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3337
3338         udelay(50);
3339 }
3340
3341 /*
3342  * GPU scratch registers helpers function.
3343  */
3344 static void si_scratch_init(struct radeon_device *rdev)
3345 {
3346         int i;
3347
3348         rdev->scratch.num_reg = 7;
3349         rdev->scratch.reg_base = SCRATCH_REG0;
3350         for (i = 0; i < rdev->scratch.num_reg; i++) {
3351                 rdev->scratch.free[i] = true;
3352                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3353         }
3354 }
3355
3356 void si_fence_ring_emit(struct radeon_device *rdev,
3357                         struct radeon_fence *fence)
3358 {
3359         struct radeon_ring *ring = &rdev->ring[fence->ring];
3360         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3361
3362         /* flush read cache over gart */
3363         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3364         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3365         radeon_ring_write(ring, 0);
3366         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3367         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3368                           PACKET3_TC_ACTION_ENA |
3369                           PACKET3_SH_KCACHE_ACTION_ENA |
3370                           PACKET3_SH_ICACHE_ACTION_ENA);
3371         radeon_ring_write(ring, 0xFFFFFFFF);
3372         radeon_ring_write(ring, 0);
3373         radeon_ring_write(ring, 10); /* poll interval */
3374         /* EVENT_WRITE_EOP - flush caches, send int */
3375         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3376         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3377         radeon_ring_write(ring, lower_32_bits(addr));
3378         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3379         radeon_ring_write(ring, fence->seq);
3380         radeon_ring_write(ring, 0);
3381 }
3382
3383 /*
3384  * IB stuff
3385  */
3386 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3387 {
3388         struct radeon_ring *ring = &rdev->ring[ib->ring];
3389         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3390         u32 header;
3391
3392         if (ib->is_const_ib) {
3393                 /* set switch buffer packet before const IB */
3394                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3395                 radeon_ring_write(ring, 0);
3396
3397                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3398         } else {
3399                 u32 next_rptr;
3400                 if (ring->rptr_save_reg) {
3401                         next_rptr = ring->wptr + 3 + 4 + 8;
3402                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3403                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3404                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3405                         radeon_ring_write(ring, next_rptr);
3406                 } else if (rdev->wb.enabled) {
3407                         next_rptr = ring->wptr + 5 + 4 + 8;
3408                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3409                         radeon_ring_write(ring, (1 << 8));
3410                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3411                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3412                         radeon_ring_write(ring, next_rptr);
3413                 }
3414
3415                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3416         }
3417
3418         radeon_ring_write(ring, header);
3419         radeon_ring_write(ring,
3420 #ifdef __BIG_ENDIAN
3421                           (2 << 0) |
3422 #endif
3423                           (ib->gpu_addr & 0xFFFFFFFC));
3424         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3425         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3426
3427         if (!ib->is_const_ib) {
3428                 /* flush read cache over gart for this vmid */
3429                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3430                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3431                 radeon_ring_write(ring, vm_id);
3432                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3433                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3434                                   PACKET3_TC_ACTION_ENA |
3435                                   PACKET3_SH_KCACHE_ACTION_ENA |
3436                                   PACKET3_SH_ICACHE_ACTION_ENA);
3437                 radeon_ring_write(ring, 0xFFFFFFFF);
3438                 radeon_ring_write(ring, 0);
3439                 radeon_ring_write(ring, 10); /* poll interval */
3440         }
3441 }
3442
3443 /*
3444  * CP.
3445  */
3446 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3447 {
3448         if (enable)
3449                 WREG32(CP_ME_CNTL, 0);
3450         else {
3451                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3452                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3453                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3454                 WREG32(SCRATCH_UMSK, 0);
3455                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3456                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3457                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3458         }
3459         udelay(50);
3460 }
3461
3462 static int si_cp_load_microcode(struct radeon_device *rdev)
3463 {
3464         int i;
3465
3466         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3467                 return -EINVAL;
3468
3469         si_cp_enable(rdev, false);
3470
3471         if (rdev->new_fw) {
3472                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3473                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3474                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3475                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3476                 const struct gfx_firmware_header_v1_0 *me_hdr =
3477                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3478                 const __le32 *fw_data;
3479                 u32 fw_size;
3480
3481                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3482                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3483                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3484
3485                 /* PFP */
3486                 fw_data = (const __le32 *)
3487                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3488                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3489                 WREG32(CP_PFP_UCODE_ADDR, 0);
3490                 for (i = 0; i < fw_size; i++)
3491                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3492                 WREG32(CP_PFP_UCODE_ADDR, 0);
3493
3494                 /* CE */
3495                 fw_data = (const __le32 *)
3496                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3497                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3498                 WREG32(CP_CE_UCODE_ADDR, 0);
3499                 for (i = 0; i < fw_size; i++)
3500                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3501                 WREG32(CP_CE_UCODE_ADDR, 0);
3502
3503                 /* ME */
3504                 fw_data = (const __be32 *)
3505                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3506                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3507                 WREG32(CP_ME_RAM_WADDR, 0);
3508                 for (i = 0; i < fw_size; i++)
3509                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3510                 WREG32(CP_ME_RAM_WADDR, 0);
3511         } else {
3512                 const __be32 *fw_data;
3513
3514                 /* PFP */
3515                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3516                 WREG32(CP_PFP_UCODE_ADDR, 0);
3517                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3518                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3519                 WREG32(CP_PFP_UCODE_ADDR, 0);
3520
3521                 /* CE */
3522                 fw_data = (const __be32 *)rdev->ce_fw->data;
3523                 WREG32(CP_CE_UCODE_ADDR, 0);
3524                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3525                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3526                 WREG32(CP_CE_UCODE_ADDR, 0);
3527
3528                 /* ME */
3529                 fw_data = (const __be32 *)rdev->me_fw->data;
3530                 WREG32(CP_ME_RAM_WADDR, 0);
3531                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3532                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3533                 WREG32(CP_ME_RAM_WADDR, 0);
3534         }
3535
3536         WREG32(CP_PFP_UCODE_ADDR, 0);
3537         WREG32(CP_CE_UCODE_ADDR, 0);
3538         WREG32(CP_ME_RAM_WADDR, 0);
3539         WREG32(CP_ME_RAM_RADDR, 0);
3540         return 0;
3541 }
3542
3543 static int si_cp_start(struct radeon_device *rdev)
3544 {
3545         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3546         int r, i;
3547
3548         r = radeon_ring_lock(rdev, ring, 7 + 4);
3549         if (r) {
3550                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3551                 return r;
3552         }
3553         /* init the CP */
3554         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3555         radeon_ring_write(ring, 0x1);
3556         radeon_ring_write(ring, 0x0);
3557         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3558         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3559         radeon_ring_write(ring, 0);
3560         radeon_ring_write(ring, 0);
3561
3562         /* init the CE partitions */
3563         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3564         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3565         radeon_ring_write(ring, 0xc000);
3566         radeon_ring_write(ring, 0xe000);
3567         radeon_ring_unlock_commit(rdev, ring, false);
3568
3569         si_cp_enable(rdev, true);
3570
3571         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3572         if (r) {
3573                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3574                 return r;
3575         }
3576
3577         /* setup clear context state */
3578         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3579         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3580
3581         for (i = 0; i < si_default_size; i++)
3582                 radeon_ring_write(ring, si_default_state[i]);
3583
3584         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3585         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3586
3587         /* set clear context state */
3588         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3589         radeon_ring_write(ring, 0);
3590
3591         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3592         radeon_ring_write(ring, 0x00000316);
3593         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3594         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3595
3596         radeon_ring_unlock_commit(rdev, ring, false);
3597
3598         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3599                 ring = &rdev->ring[i];
3600                 r = radeon_ring_lock(rdev, ring, 2);
3601
3602                 /* clear the compute context state */
3603                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3604                 radeon_ring_write(ring, 0);
3605
3606                 radeon_ring_unlock_commit(rdev, ring, false);
3607         }
3608
3609         return 0;
3610 }
3611
3612 static void si_cp_fini(struct radeon_device *rdev)
3613 {
3614         struct radeon_ring *ring;
3615         si_cp_enable(rdev, false);
3616
3617         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3618         radeon_ring_fini(rdev, ring);
3619         radeon_scratch_free(rdev, ring->rptr_save_reg);
3620
3621         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3622         radeon_ring_fini(rdev, ring);
3623         radeon_scratch_free(rdev, ring->rptr_save_reg);
3624
3625         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3626         radeon_ring_fini(rdev, ring);
3627         radeon_scratch_free(rdev, ring->rptr_save_reg);
3628 }
3629
3630 static int si_cp_resume(struct radeon_device *rdev)
3631 {
3632         struct radeon_ring *ring;
3633         u32 tmp;
3634         u32 rb_bufsz;
3635         int r;
3636
3637         si_enable_gui_idle_interrupt(rdev, false);
3638
3639         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3640         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3641
3642         /* Set the write pointer delay */
3643         WREG32(CP_RB_WPTR_DELAY, 0);
3644
3645         WREG32(CP_DEBUG, 0);
3646         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3647
3648         /* ring 0 - compute and gfx */
3649         /* Set ring buffer size */
3650         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3651         rb_bufsz = order_base_2(ring->ring_size / 8);
3652         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3653 #ifdef __BIG_ENDIAN
3654         tmp |= BUF_SWAP_32BIT;
3655 #endif
3656         WREG32(CP_RB0_CNTL, tmp);
3657
3658         /* Initialize the ring buffer's read and write pointers */
3659         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3660         ring->wptr = 0;
3661         WREG32(CP_RB0_WPTR, ring->wptr);
3662
3663         /* set the wb address whether it's enabled or not */
3664         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3665         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3666
3667         if (rdev->wb.enabled)
3668                 WREG32(SCRATCH_UMSK, 0xff);
3669         else {
3670                 tmp |= RB_NO_UPDATE;
3671                 WREG32(SCRATCH_UMSK, 0);
3672         }
3673
3674         mdelay(1);
3675         WREG32(CP_RB0_CNTL, tmp);
3676
3677         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3678
3679         /* ring1  - compute only */
3680         /* Set ring buffer size */
3681         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3682         rb_bufsz = order_base_2(ring->ring_size / 8);
3683         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3684 #ifdef __BIG_ENDIAN
3685         tmp |= BUF_SWAP_32BIT;
3686 #endif
3687         WREG32(CP_RB1_CNTL, tmp);
3688
3689         /* Initialize the ring buffer's read and write pointers */
3690         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3691         ring->wptr = 0;
3692         WREG32(CP_RB1_WPTR, ring->wptr);
3693
3694         /* set the wb address whether it's enabled or not */
3695         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3696         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3697
3698         mdelay(1);
3699         WREG32(CP_RB1_CNTL, tmp);
3700
3701         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3702
3703         /* ring2 - compute only */
3704         /* Set ring buffer size */
3705         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3706         rb_bufsz = order_base_2(ring->ring_size / 8);
3707         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3708 #ifdef __BIG_ENDIAN
3709         tmp |= BUF_SWAP_32BIT;
3710 #endif
3711         WREG32(CP_RB2_CNTL, tmp);
3712
3713         /* Initialize the ring buffer's read and write pointers */
3714         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3715         ring->wptr = 0;
3716         WREG32(CP_RB2_WPTR, ring->wptr);
3717
3718         /* set the wb address whether it's enabled or not */
3719         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3720         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3721
3722         mdelay(1);
3723         WREG32(CP_RB2_CNTL, tmp);
3724
3725         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3726
3727         /* start the rings */
3728         si_cp_start(rdev);
3729         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3730         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3731         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3732         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3733         if (r) {
3734                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3735                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3736                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3737                 return r;
3738         }
3739         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3740         if (r) {
3741                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3742         }
3743         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3744         if (r) {
3745                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3746         }
3747
3748         si_enable_gui_idle_interrupt(rdev, true);
3749
3750         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3751                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3752
3753         return 0;
3754 }
3755
3756 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3757 {
3758         u32 reset_mask = 0;
3759         u32 tmp;
3760
3761         /* GRBM_STATUS */
3762         tmp = RREG32(GRBM_STATUS);
3763         if (tmp & (PA_BUSY | SC_BUSY |
3764                    BCI_BUSY | SX_BUSY |
3765                    TA_BUSY | VGT_BUSY |
3766                    DB_BUSY | CB_BUSY |
3767                    GDS_BUSY | SPI_BUSY |
3768                    IA_BUSY | IA_BUSY_NO_DMA))
3769                 reset_mask |= RADEON_RESET_GFX;
3770
3771         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3772                    CP_BUSY | CP_COHERENCY_BUSY))
3773                 reset_mask |= RADEON_RESET_CP;
3774
3775         if (tmp & GRBM_EE_BUSY)
3776                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3777
3778         /* GRBM_STATUS2 */
3779         tmp = RREG32(GRBM_STATUS2);
3780         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3781                 reset_mask |= RADEON_RESET_RLC;
3782
3783         /* DMA_STATUS_REG 0 */
3784         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3785         if (!(tmp & DMA_IDLE))
3786                 reset_mask |= RADEON_RESET_DMA;
3787
3788         /* DMA_STATUS_REG 1 */
3789         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3790         if (!(tmp & DMA_IDLE))
3791                 reset_mask |= RADEON_RESET_DMA1;
3792
3793         /* SRBM_STATUS2 */
3794         tmp = RREG32(SRBM_STATUS2);
3795         if (tmp & DMA_BUSY)
3796                 reset_mask |= RADEON_RESET_DMA;
3797
3798         if (tmp & DMA1_BUSY)
3799                 reset_mask |= RADEON_RESET_DMA1;
3800
3801         /* SRBM_STATUS */
3802         tmp = RREG32(SRBM_STATUS);
3803
3804         if (tmp & IH_BUSY)
3805                 reset_mask |= RADEON_RESET_IH;
3806
3807         if (tmp & SEM_BUSY)
3808                 reset_mask |= RADEON_RESET_SEM;
3809
3810         if (tmp & GRBM_RQ_PENDING)
3811                 reset_mask |= RADEON_RESET_GRBM;
3812
3813         if (tmp & VMC_BUSY)
3814                 reset_mask |= RADEON_RESET_VMC;
3815
3816         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3817                    MCC_BUSY | MCD_BUSY))
3818                 reset_mask |= RADEON_RESET_MC;
3819
3820         if (evergreen_is_display_hung(rdev))
3821                 reset_mask |= RADEON_RESET_DISPLAY;
3822
3823         /* VM_L2_STATUS */
3824         tmp = RREG32(VM_L2_STATUS);
3825         if (tmp & L2_BUSY)
3826                 reset_mask |= RADEON_RESET_VMC;
3827
3828         /* Skip MC reset as it's mostly likely not hung, just busy */
3829         if (reset_mask & RADEON_RESET_MC) {
3830                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3831                 reset_mask &= ~RADEON_RESET_MC;
3832         }
3833
3834         return reset_mask;
3835 }
3836
3837 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3838 {
3839         struct evergreen_mc_save save;
3840         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3841         u32 tmp;
3842
3843         if (reset_mask == 0)
3844                 return;
3845
3846         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3847
3848         evergreen_print_gpu_status_regs(rdev);
3849         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3850                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3851         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3852                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3853
3854         /* disable PG/CG */
3855         si_fini_pg(rdev);
3856         si_fini_cg(rdev);
3857
3858         /* stop the rlc */
3859         si_rlc_stop(rdev);
3860
3861         /* Disable CP parsing/prefetching */
3862         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3863
3864         if (reset_mask & RADEON_RESET_DMA) {
3865                 /* dma0 */
3866                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3867                 tmp &= ~DMA_RB_ENABLE;
3868                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3869         }
3870         if (reset_mask & RADEON_RESET_DMA1) {
3871                 /* dma1 */
3872                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3873                 tmp &= ~DMA_RB_ENABLE;
3874                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3875         }
3876
3877         udelay(50);
3878
3879         evergreen_mc_stop(rdev, &save);
3880         if (evergreen_mc_wait_for_idle(rdev)) {
3881                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3882         }
3883
3884         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3885                 grbm_soft_reset = SOFT_RESET_CB |
3886                         SOFT_RESET_DB |
3887                         SOFT_RESET_GDS |
3888                         SOFT_RESET_PA |
3889                         SOFT_RESET_SC |
3890                         SOFT_RESET_BCI |
3891                         SOFT_RESET_SPI |
3892                         SOFT_RESET_SX |
3893                         SOFT_RESET_TC |
3894                         SOFT_RESET_TA |
3895                         SOFT_RESET_VGT |
3896                         SOFT_RESET_IA;
3897         }
3898
3899         if (reset_mask & RADEON_RESET_CP) {
3900                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3901
3902                 srbm_soft_reset |= SOFT_RESET_GRBM;
3903         }
3904
3905         if (reset_mask & RADEON_RESET_DMA)
3906                 srbm_soft_reset |= SOFT_RESET_DMA;
3907
3908         if (reset_mask & RADEON_RESET_DMA1)
3909                 srbm_soft_reset |= SOFT_RESET_DMA1;
3910
3911         if (reset_mask & RADEON_RESET_DISPLAY)
3912                 srbm_soft_reset |= SOFT_RESET_DC;
3913
3914         if (reset_mask & RADEON_RESET_RLC)
3915                 grbm_soft_reset |= SOFT_RESET_RLC;
3916
3917         if (reset_mask & RADEON_RESET_SEM)
3918                 srbm_soft_reset |= SOFT_RESET_SEM;
3919
3920         if (reset_mask & RADEON_RESET_IH)
3921                 srbm_soft_reset |= SOFT_RESET_IH;
3922
3923         if (reset_mask & RADEON_RESET_GRBM)
3924                 srbm_soft_reset |= SOFT_RESET_GRBM;
3925
3926         if (reset_mask & RADEON_RESET_VMC)
3927                 srbm_soft_reset |= SOFT_RESET_VMC;
3928
3929         if (reset_mask & RADEON_RESET_MC)
3930                 srbm_soft_reset |= SOFT_RESET_MC;
3931
3932         if (grbm_soft_reset) {
3933                 tmp = RREG32(GRBM_SOFT_RESET);
3934                 tmp |= grbm_soft_reset;
3935                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3936                 WREG32(GRBM_SOFT_RESET, tmp);
3937                 tmp = RREG32(GRBM_SOFT_RESET);
3938
3939                 udelay(50);
3940
3941                 tmp &= ~grbm_soft_reset;
3942                 WREG32(GRBM_SOFT_RESET, tmp);
3943                 tmp = RREG32(GRBM_SOFT_RESET);
3944         }
3945
3946         if (srbm_soft_reset) {
3947                 tmp = RREG32(SRBM_SOFT_RESET);
3948                 tmp |= srbm_soft_reset;
3949                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3950                 WREG32(SRBM_SOFT_RESET, tmp);
3951                 tmp = RREG32(SRBM_SOFT_RESET);
3952
3953                 udelay(50);
3954
3955                 tmp &= ~srbm_soft_reset;
3956                 WREG32(SRBM_SOFT_RESET, tmp);
3957                 tmp = RREG32(SRBM_SOFT_RESET);
3958         }
3959
3960         /* Wait a little for things to settle down */
3961         udelay(50);
3962
3963         evergreen_mc_resume(rdev, &save);
3964         udelay(50);
3965
3966         evergreen_print_gpu_status_regs(rdev);
3967 }
3968
3969 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3970 {
3971         u32 tmp, i;
3972
3973         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3974         tmp |= SPLL_BYPASS_EN;
3975         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3976
3977         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3978         tmp |= SPLL_CTLREQ_CHG;
3979         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3980
3981         for (i = 0; i < rdev->usec_timeout; i++) {
3982                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3983                         break;
3984                 udelay(1);
3985         }
3986
3987         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3988         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3989         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3990
3991         tmp = RREG32(MPLL_CNTL_MODE);
3992         tmp &= ~MPLL_MCLK_SEL;
3993         WREG32(MPLL_CNTL_MODE, tmp);
3994 }
3995
3996 static void si_spll_powerdown(struct radeon_device *rdev)
3997 {
3998         u32 tmp;
3999
4000         tmp = RREG32(SPLL_CNTL_MODE);
4001         tmp |= SPLL_SW_DIR_CONTROL;
4002         WREG32(SPLL_CNTL_MODE, tmp);
4003
4004         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4005         tmp |= SPLL_RESET;
4006         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4007
4008         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4009         tmp |= SPLL_SLEEP;
4010         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4011
4012         tmp = RREG32(SPLL_CNTL_MODE);
4013         tmp &= ~SPLL_SW_DIR_CONTROL;
4014         WREG32(SPLL_CNTL_MODE, tmp);
4015 }
4016
4017 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4018 {
4019         struct evergreen_mc_save save;
4020         u32 tmp, i;
4021
4022         dev_info(rdev->dev, "GPU pci config reset\n");
4023
4024         /* disable dpm? */
4025
4026         /* disable cg/pg */
4027         si_fini_pg(rdev);
4028         si_fini_cg(rdev);
4029
4030         /* Disable CP parsing/prefetching */
4031         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4032         /* dma0 */
4033         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4034         tmp &= ~DMA_RB_ENABLE;
4035         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4036         /* dma1 */
4037         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4038         tmp &= ~DMA_RB_ENABLE;
4039         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4040         /* XXX other engines? */
4041
4042         /* halt the rlc, disable cp internal ints */
4043         si_rlc_stop(rdev);
4044
4045         udelay(50);
4046
4047         /* disable mem access */
4048         evergreen_mc_stop(rdev, &save);
4049         if (evergreen_mc_wait_for_idle(rdev)) {
4050                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4051         }
4052
4053         /* set mclk/sclk to bypass */
4054         si_set_clk_bypass_mode(rdev);
4055         /* powerdown spll */
4056         si_spll_powerdown(rdev);
4057         /* disable BM */
4058         pci_clear_master(rdev->pdev);
4059         /* reset */
4060         radeon_pci_config_reset(rdev);
4061         /* wait for asic to come out of reset */
4062         for (i = 0; i < rdev->usec_timeout; i++) {
4063                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4064                         break;
4065                 udelay(1);
4066         }
4067 }
4068
4069 int si_asic_reset(struct radeon_device *rdev, bool hard)
4070 {
4071         u32 reset_mask;
4072
4073         if (hard) {
4074                 si_gpu_pci_config_reset(rdev);
4075                 return 0;
4076         }
4077
4078         reset_mask = si_gpu_check_soft_reset(rdev);
4079
4080         if (reset_mask)
4081                 r600_set_bios_scratch_engine_hung(rdev, true);
4082
4083         /* try soft reset */
4084         si_gpu_soft_reset(rdev, reset_mask);
4085
4086         reset_mask = si_gpu_check_soft_reset(rdev);
4087
4088         /* try pci config reset */
4089         if (reset_mask && radeon_hard_reset)
4090                 si_gpu_pci_config_reset(rdev);
4091
4092         reset_mask = si_gpu_check_soft_reset(rdev);
4093
4094         if (!reset_mask)
4095                 r600_set_bios_scratch_engine_hung(rdev, false);
4096
4097         return 0;
4098 }
4099
4100 /**
4101  * si_gfx_is_lockup - Check if the GFX engine is locked up
4102  *
4103  * @rdev: radeon_device pointer
4104  * @ring: radeon_ring structure holding ring information
4105  *
4106  * Check if the GFX engine is locked up.
4107  * Returns true if the engine appears to be locked up, false if not.
4108  */
4109 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4110 {
4111         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4112
4113         if (!(reset_mask & (RADEON_RESET_GFX |
4114                             RADEON_RESET_COMPUTE |
4115                             RADEON_RESET_CP))) {
4116                 radeon_ring_lockup_update(rdev, ring);
4117                 return false;
4118         }
4119         return radeon_ring_test_lockup(rdev, ring);
4120 }
4121
4122 /* MC */
4123 static void si_mc_program(struct radeon_device *rdev)
4124 {
4125         struct evergreen_mc_save save;
4126         u32 tmp;
4127         int i, j;
4128
4129         /* Initialize HDP */
4130         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4131                 WREG32((0x2c14 + j), 0x00000000);
4132                 WREG32((0x2c18 + j), 0x00000000);
4133                 WREG32((0x2c1c + j), 0x00000000);
4134                 WREG32((0x2c20 + j), 0x00000000);
4135                 WREG32((0x2c24 + j), 0x00000000);
4136         }
4137         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4138
4139         evergreen_mc_stop(rdev, &save);
4140         if (radeon_mc_wait_for_idle(rdev)) {
4141                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4142         }
4143         if (!ASIC_IS_NODCE(rdev))
4144                 /* Lockout access through VGA aperture*/
4145                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4146         /* Update configuration */
4147         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4148                rdev->mc.vram_start >> 12);
4149         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4150                rdev->mc.vram_end >> 12);
4151         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4152                rdev->vram_scratch.gpu_addr >> 12);
4153         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4154         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4155         WREG32(MC_VM_FB_LOCATION, tmp);
4156         /* XXX double check these! */
4157         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4158         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4159         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4160         WREG32(MC_VM_AGP_BASE, 0);
4161         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4162         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4163         if (radeon_mc_wait_for_idle(rdev)) {
4164                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4165         }
4166         evergreen_mc_resume(rdev, &save);
4167         if (!ASIC_IS_NODCE(rdev)) {
4168                 /* we need to own VRAM, so turn off the VGA renderer here
4169                  * to stop it overwriting our objects */
4170                 rv515_vga_render_disable(rdev);
4171         }
4172 }
4173
4174 void si_vram_gtt_location(struct radeon_device *rdev,
4175                           struct radeon_mc *mc)
4176 {
4177         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4178                 /* leave room for at least 1024M GTT */
4179                 dev_warn(rdev->dev, "limiting VRAM\n");
4180                 mc->real_vram_size = 0xFFC0000000ULL;
4181                 mc->mc_vram_size = 0xFFC0000000ULL;
4182         }
4183         radeon_vram_location(rdev, &rdev->mc, 0);
4184         rdev->mc.gtt_base_align = 0;
4185         radeon_gtt_location(rdev, mc);
4186 }
4187
4188 static int si_mc_init(struct radeon_device *rdev)
4189 {
4190         u32 tmp;
4191         int chansize, numchan;
4192
4193         /* Get VRAM informations */
4194         rdev->mc.vram_is_ddr = true;
4195         tmp = RREG32(MC_ARB_RAMCFG);
4196         if (tmp & CHANSIZE_OVERRIDE) {
4197                 chansize = 16;
4198         } else if (tmp & CHANSIZE_MASK) {
4199                 chansize = 64;
4200         } else {
4201                 chansize = 32;
4202         }
4203         tmp = RREG32(MC_SHARED_CHMAP);
4204         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4205         case 0:
4206         default:
4207                 numchan = 1;
4208                 break;
4209         case 1:
4210                 numchan = 2;
4211                 break;
4212         case 2:
4213                 numchan = 4;
4214                 break;
4215         case 3:
4216                 numchan = 8;
4217                 break;
4218         case 4:
4219                 numchan = 3;
4220                 break;
4221         case 5:
4222                 numchan = 6;
4223                 break;
4224         case 6:
4225                 numchan = 10;
4226                 break;
4227         case 7:
4228                 numchan = 12;
4229                 break;
4230         case 8:
4231                 numchan = 16;
4232                 break;
4233         }
4234         rdev->mc.vram_width = numchan * chansize;
4235         /* Could aper size report 0 ? */
4236         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4237         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4238         /* size in MB on si */
4239         tmp = RREG32(CONFIG_MEMSIZE);
4240         /* some boards may have garbage in the upper 16 bits */
4241         if (tmp & 0xffff0000) {
4242                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4243                 if (tmp & 0xffff)
4244                         tmp &= 0xffff;
4245         }
4246         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4247         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4248         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4249         si_vram_gtt_location(rdev, &rdev->mc);
4250         radeon_update_bandwidth_info(rdev);
4251
4252         return 0;
4253 }
4254
4255 /*
4256  * GART
4257  */
4258 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4259 {
4260         /* flush hdp cache */
4261         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4262
4263         /* bits 0-15 are the VM contexts0-15 */
4264         WREG32(VM_INVALIDATE_REQUEST, 1);
4265 }
4266
4267 static int si_pcie_gart_enable(struct radeon_device *rdev)
4268 {
4269         int r, i;
4270
4271         if (rdev->gart.robj == NULL) {
4272                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4273                 return -EINVAL;
4274         }
4275         r = radeon_gart_table_vram_pin(rdev);
4276         if (r)
4277                 return r;
4278         /* Setup TLB control */
4279         WREG32(MC_VM_MX_L1_TLB_CNTL,
4280                (0xA << 7) |
4281                ENABLE_L1_TLB |
4282                ENABLE_L1_FRAGMENT_PROCESSING |
4283                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4284                ENABLE_ADVANCED_DRIVER_MODEL |
4285                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4286         /* Setup L2 cache */
4287         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4288                ENABLE_L2_FRAGMENT_PROCESSING |
4289                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4290                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4291                EFFECTIVE_L2_QUEUE_SIZE(7) |
4292                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4293         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4294         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4295                BANK_SELECT(4) |
4296                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4297         /* setup context0 */
4298         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4299         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4300         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4301         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4302                         (u32)(rdev->dummy_page.addr >> 12));
4303         WREG32(VM_CONTEXT0_CNTL2, 0);
4304         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4305                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4306
4307         WREG32(0x15D4, 0);
4308         WREG32(0x15D8, 0);
4309         WREG32(0x15DC, 0);
4310
4311         /* empty context1-15 */
4312         /* set vm size, must be a multiple of 4 */
4313         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4314         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4315         /* Assign the pt base to something valid for now; the pts used for
4316          * the VMs are determined by the application and setup and assigned
4317          * on the fly in the vm part of radeon_gart.c
4318          */
4319         for (i = 1; i < 16; i++) {
4320                 if (i < 8)
4321                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4322                                rdev->vm_manager.saved_table_addr[i]);
4323                 else
4324                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4325                                rdev->vm_manager.saved_table_addr[i]);
4326         }
4327
4328         /* enable context1-15 */
4329         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4330                (u32)(rdev->dummy_page.addr >> 12));
4331         WREG32(VM_CONTEXT1_CNTL2, 4);
4332         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4333                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4334                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4335                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4336                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4337                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4338                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4339                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4340                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4341                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4342                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4343                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4344                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4345                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4346
4347         si_pcie_gart_tlb_flush(rdev);
4348         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4349                  (unsigned)(rdev->mc.gtt_size >> 20),
4350                  (unsigned long long)rdev->gart.table_addr);
4351         rdev->gart.ready = true;
4352         return 0;
4353 }
4354
4355 static void si_pcie_gart_disable(struct radeon_device *rdev)
4356 {
4357         unsigned i;
4358
4359         for (i = 1; i < 16; ++i) {
4360                 uint32_t reg;
4361                 if (i < 8)
4362                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4363                 else
4364                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4365                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4366         }
4367
4368         /* Disable all tables */
4369         WREG32(VM_CONTEXT0_CNTL, 0);
4370         WREG32(VM_CONTEXT1_CNTL, 0);
4371         /* Setup TLB control */
4372         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4373                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4374         /* Setup L2 cache */
4375         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4376                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4377                EFFECTIVE_L2_QUEUE_SIZE(7) |
4378                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4379         WREG32(VM_L2_CNTL2, 0);
4380         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4381                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4382         radeon_gart_table_vram_unpin(rdev);
4383 }
4384
4385 static void si_pcie_gart_fini(struct radeon_device *rdev)
4386 {
4387         si_pcie_gart_disable(rdev);
4388         radeon_gart_table_vram_free(rdev);
4389         radeon_gart_fini(rdev);
4390 }
4391
4392 /* vm parser */
4393 static bool si_vm_reg_valid(u32 reg)
4394 {
4395         /* context regs are fine */
4396         if (reg >= 0x28000)
4397                 return true;
4398
4399         /* shader regs are also fine */
4400         if (reg >= 0xB000 && reg < 0xC000)
4401                 return true;
4402
4403         /* check config regs */
4404         switch (reg) {
4405         case GRBM_GFX_INDEX:
4406         case CP_STRMOUT_CNTL:
4407         case VGT_VTX_VECT_EJECT_REG:
4408         case VGT_CACHE_INVALIDATION:
4409         case VGT_ESGS_RING_SIZE:
4410         case VGT_GSVS_RING_SIZE:
4411         case VGT_GS_VERTEX_REUSE:
4412         case VGT_PRIMITIVE_TYPE:
4413         case VGT_INDEX_TYPE:
4414         case VGT_NUM_INDICES:
4415         case VGT_NUM_INSTANCES:
4416         case VGT_TF_RING_SIZE:
4417         case VGT_HS_OFFCHIP_PARAM:
4418         case VGT_TF_MEMORY_BASE:
4419         case PA_CL_ENHANCE:
4420         case PA_SU_LINE_STIPPLE_VALUE:
4421         case PA_SC_LINE_STIPPLE_STATE:
4422         case PA_SC_ENHANCE:
4423         case SQC_CACHES:
4424         case SPI_STATIC_THREAD_MGMT_1:
4425         case SPI_STATIC_THREAD_MGMT_2:
4426         case SPI_STATIC_THREAD_MGMT_3:
4427         case SPI_PS_MAX_WAVE_ID:
4428         case SPI_CONFIG_CNTL:
4429         case SPI_CONFIG_CNTL_1:
4430         case TA_CNTL_AUX:
4431         case TA_CS_BC_BASE_ADDR:
4432                 return true;
4433         default:
4434                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4435                 return false;
4436         }
4437 }
4438
4439 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4440                                   u32 *ib, struct radeon_cs_packet *pkt)
4441 {
4442         switch (pkt->opcode) {
4443         case PACKET3_NOP:
4444         case PACKET3_SET_BASE:
4445         case PACKET3_SET_CE_DE_COUNTERS:
4446         case PACKET3_LOAD_CONST_RAM:
4447         case PACKET3_WRITE_CONST_RAM:
4448         case PACKET3_WRITE_CONST_RAM_OFFSET:
4449         case PACKET3_DUMP_CONST_RAM:
4450         case PACKET3_INCREMENT_CE_COUNTER:
4451         case PACKET3_WAIT_ON_DE_COUNTER:
4452         case PACKET3_CE_WRITE:
4453                 break;
4454         default:
4455                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4456                 return -EINVAL;
4457         }
4458         return 0;
4459 }
4460
4461 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4462 {
4463         u32 start_reg, reg, i;
4464         u32 command = ib[idx + 4];
4465         u32 info = ib[idx + 1];
4466         u32 idx_value = ib[idx];
4467         if (command & PACKET3_CP_DMA_CMD_SAS) {
4468                 /* src address space is register */
4469                 if (((info & 0x60000000) >> 29) == 0) {
4470                         start_reg = idx_value << 2;
4471                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4472                                 reg = start_reg;
4473                                 if (!si_vm_reg_valid(reg)) {
4474                                         DRM_ERROR("CP DMA Bad SRC register\n");
4475                                         return -EINVAL;
4476                                 }
4477                         } else {
4478                                 for (i = 0; i < (command & 0x1fffff); i++) {
4479                                         reg = start_reg + (4 * i);
4480                                         if (!si_vm_reg_valid(reg)) {
4481                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4482                                                 return -EINVAL;
4483                                         }
4484                                 }
4485                         }
4486                 }
4487         }
4488         if (command & PACKET3_CP_DMA_CMD_DAS) {
4489                 /* dst address space is register */
4490                 if (((info & 0x00300000) >> 20) == 0) {
4491                         start_reg = ib[idx + 2];
4492                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4493                                 reg = start_reg;
4494                                 if (!si_vm_reg_valid(reg)) {
4495                                         DRM_ERROR("CP DMA Bad DST register\n");
4496                                         return -EINVAL;
4497                                 }
4498                         } else {
4499                                 for (i = 0; i < (command & 0x1fffff); i++) {
4500                                         reg = start_reg + (4 * i);
4501                                 if (!si_vm_reg_valid(reg)) {
4502                                                 DRM_ERROR("CP DMA Bad DST register\n");
4503                                                 return -EINVAL;
4504                                         }
4505                                 }
4506                         }
4507                 }
4508         }
4509         return 0;
4510 }
4511
4512 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4513                                    u32 *ib, struct radeon_cs_packet *pkt)
4514 {
4515         int r;
4516         u32 idx = pkt->idx + 1;
4517         u32 idx_value = ib[idx];
4518         u32 start_reg, end_reg, reg, i;
4519
4520         switch (pkt->opcode) {
4521         case PACKET3_NOP:
4522         case PACKET3_SET_BASE:
4523         case PACKET3_CLEAR_STATE:
4524         case PACKET3_INDEX_BUFFER_SIZE:
4525         case PACKET3_DISPATCH_DIRECT:
4526         case PACKET3_DISPATCH_INDIRECT:
4527         case PACKET3_ALLOC_GDS:
4528         case PACKET3_WRITE_GDS_RAM:
4529         case PACKET3_ATOMIC_GDS:
4530         case PACKET3_ATOMIC:
4531         case PACKET3_OCCLUSION_QUERY:
4532         case PACKET3_SET_PREDICATION:
4533         case PACKET3_COND_EXEC:
4534         case PACKET3_PRED_EXEC:
4535         case PACKET3_DRAW_INDIRECT:
4536         case PACKET3_DRAW_INDEX_INDIRECT:
4537         case PACKET3_INDEX_BASE:
4538         case PACKET3_DRAW_INDEX_2:
4539         case PACKET3_CONTEXT_CONTROL:
4540         case PACKET3_INDEX_TYPE:
4541         case PACKET3_DRAW_INDIRECT_MULTI:
4542         case PACKET3_DRAW_INDEX_AUTO:
4543         case PACKET3_DRAW_INDEX_IMMD:
4544         case PACKET3_NUM_INSTANCES:
4545         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4546         case PACKET3_STRMOUT_BUFFER_UPDATE:
4547         case PACKET3_DRAW_INDEX_OFFSET_2:
4548         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4549         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4550         case PACKET3_MPEG_INDEX:
4551         case PACKET3_WAIT_REG_MEM:
4552         case PACKET3_MEM_WRITE:
4553         case PACKET3_PFP_SYNC_ME:
4554         case PACKET3_SURFACE_SYNC:
4555         case PACKET3_EVENT_WRITE:
4556         case PACKET3_EVENT_WRITE_EOP:
4557         case PACKET3_EVENT_WRITE_EOS:
4558         case PACKET3_SET_CONTEXT_REG:
4559         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4560         case PACKET3_SET_SH_REG:
4561         case PACKET3_SET_SH_REG_OFFSET:
4562         case PACKET3_INCREMENT_DE_COUNTER:
4563         case PACKET3_WAIT_ON_CE_COUNTER:
4564         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4565         case PACKET3_ME_WRITE:
4566                 break;
4567         case PACKET3_COPY_DATA:
4568                 if ((idx_value & 0xf00) == 0) {
4569                         reg = ib[idx + 3] * 4;
4570                         if (!si_vm_reg_valid(reg))
4571                                 return -EINVAL;
4572                 }
4573                 break;
4574         case PACKET3_WRITE_DATA:
4575                 if ((idx_value & 0xf00) == 0) {
4576                         start_reg = ib[idx + 1] * 4;
4577                         if (idx_value & 0x10000) {
4578                                 if (!si_vm_reg_valid(start_reg))
4579                                         return -EINVAL;
4580                         } else {
4581                                 for (i = 0; i < (pkt->count - 2); i++) {
4582                                         reg = start_reg + (4 * i);
4583                                         if (!si_vm_reg_valid(reg))
4584                                                 return -EINVAL;
4585                                 }
4586                         }
4587                 }
4588                 break;
4589         case PACKET3_COND_WRITE:
4590                 if (idx_value & 0x100) {
4591                         reg = ib[idx + 5] * 4;
4592                         if (!si_vm_reg_valid(reg))
4593                                 return -EINVAL;
4594                 }
4595                 break;
4596         case PACKET3_COPY_DW:
4597                 if (idx_value & 0x2) {
4598                         reg = ib[idx + 3] * 4;
4599                         if (!si_vm_reg_valid(reg))
4600                                 return -EINVAL;
4601                 }
4602                 break;
4603         case PACKET3_SET_CONFIG_REG:
4604                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4605                 end_reg = 4 * pkt->count + start_reg - 4;
4606                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4607                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4608                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4609                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4610                         return -EINVAL;
4611                 }
4612                 for (i = 0; i < pkt->count; i++) {
4613                         reg = start_reg + (4 * i);
4614                         if (!si_vm_reg_valid(reg))
4615                                 return -EINVAL;
4616                 }
4617                 break;
4618         case PACKET3_CP_DMA:
4619                 r = si_vm_packet3_cp_dma_check(ib, idx);
4620                 if (r)
4621                         return r;
4622                 break;
4623         default:
4624                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4625                 return -EINVAL;
4626         }
4627         return 0;
4628 }
4629
4630 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4631                                        u32 *ib, struct radeon_cs_packet *pkt)
4632 {
4633         int r;
4634         u32 idx = pkt->idx + 1;
4635         u32 idx_value = ib[idx];
4636         u32 start_reg, reg, i;
4637
4638         switch (pkt->opcode) {
4639         case PACKET3_NOP:
4640         case PACKET3_SET_BASE:
4641         case PACKET3_CLEAR_STATE:
4642         case PACKET3_DISPATCH_DIRECT:
4643         case PACKET3_DISPATCH_INDIRECT:
4644         case PACKET3_ALLOC_GDS:
4645         case PACKET3_WRITE_GDS_RAM:
4646         case PACKET3_ATOMIC_GDS:
4647         case PACKET3_ATOMIC:
4648         case PACKET3_OCCLUSION_QUERY:
4649         case PACKET3_SET_PREDICATION:
4650         case PACKET3_COND_EXEC:
4651         case PACKET3_PRED_EXEC:
4652         case PACKET3_CONTEXT_CONTROL:
4653         case PACKET3_STRMOUT_BUFFER_UPDATE:
4654         case PACKET3_WAIT_REG_MEM:
4655         case PACKET3_MEM_WRITE:
4656         case PACKET3_PFP_SYNC_ME:
4657         case PACKET3_SURFACE_SYNC:
4658         case PACKET3_EVENT_WRITE:
4659         case PACKET3_EVENT_WRITE_EOP:
4660         case PACKET3_EVENT_WRITE_EOS:
4661         case PACKET3_SET_CONTEXT_REG:
4662         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4663         case PACKET3_SET_SH_REG:
4664         case PACKET3_SET_SH_REG_OFFSET:
4665         case PACKET3_INCREMENT_DE_COUNTER:
4666         case PACKET3_WAIT_ON_CE_COUNTER:
4667         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4668         case PACKET3_ME_WRITE:
4669                 break;
4670         case PACKET3_COPY_DATA:
4671                 if ((idx_value & 0xf00) == 0) {
4672                         reg = ib[idx + 3] * 4;
4673                         if (!si_vm_reg_valid(reg))
4674                                 return -EINVAL;
4675                 }
4676                 break;
4677         case PACKET3_WRITE_DATA:
4678                 if ((idx_value & 0xf00) == 0) {
4679                         start_reg = ib[idx + 1] * 4;
4680                         if (idx_value & 0x10000) {
4681                                 if (!si_vm_reg_valid(start_reg))
4682                                         return -EINVAL;
4683                         } else {
4684                                 for (i = 0; i < (pkt->count - 2); i++) {
4685                                         reg = start_reg + (4 * i);
4686                                         if (!si_vm_reg_valid(reg))
4687                                                 return -EINVAL;
4688                                 }
4689                         }
4690                 }
4691                 break;
4692         case PACKET3_COND_WRITE:
4693                 if (idx_value & 0x100) {
4694                         reg = ib[idx + 5] * 4;
4695                         if (!si_vm_reg_valid(reg))
4696                                 return -EINVAL;
4697                 }
4698                 break;
4699         case PACKET3_COPY_DW:
4700                 if (idx_value & 0x2) {
4701                         reg = ib[idx + 3] * 4;
4702                         if (!si_vm_reg_valid(reg))
4703                                 return -EINVAL;
4704                 }
4705                 break;
4706         case PACKET3_CP_DMA:
4707                 r = si_vm_packet3_cp_dma_check(ib, idx);
4708                 if (r)
4709                         return r;
4710                 break;
4711         default:
4712                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4713                 return -EINVAL;
4714         }
4715         return 0;
4716 }
4717
4718 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4719 {
4720         int ret = 0;
4721         u32 idx = 0, i;
4722         struct radeon_cs_packet pkt;
4723
4724         do {
4725                 pkt.idx = idx;
4726                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4727                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4728                 pkt.one_reg_wr = 0;
4729                 switch (pkt.type) {
4730                 case RADEON_PACKET_TYPE0:
4731                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4732                         ret = -EINVAL;
4733                         break;
4734                 case RADEON_PACKET_TYPE2:
4735                         idx += 1;
4736                         break;
4737                 case RADEON_PACKET_TYPE3:
4738                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4739                         if (ib->is_const_ib)
4740                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4741                         else {
4742                                 switch (ib->ring) {
4743                                 case RADEON_RING_TYPE_GFX_INDEX:
4744                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4745                                         break;
4746                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4747                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4748                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4749                                         break;
4750                                 default:
4751                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4752                                         ret = -EINVAL;
4753                                         break;
4754                                 }
4755                         }
4756                         idx += pkt.count + 2;
4757                         break;
4758                 default:
4759                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4760                         ret = -EINVAL;
4761                         break;
4762                 }
4763                 if (ret) {
4764                         for (i = 0; i < ib->length_dw; i++) {
4765                                 if (i == idx)
4766                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4767                                 else
4768                                         printk("\t0x%08x\n", ib->ptr[i]);
4769                         }
4770                         break;
4771                 }
4772         } while (idx < ib->length_dw);
4773
4774         return ret;
4775 }
4776
4777 /*
4778  * vm
4779  */
4780 int si_vm_init(struct radeon_device *rdev)
4781 {
4782         /* number of VMs */
4783         rdev->vm_manager.nvm = 16;
4784         /* base offset of vram pages */
4785         rdev->vm_manager.vram_base_offset = 0;
4786
4787         return 0;
4788 }
4789
4790 void si_vm_fini(struct radeon_device *rdev)
4791 {
4792 }
4793
4794 /**
4795  * si_vm_decode_fault - print human readable fault info
4796  *
4797  * @rdev: radeon_device pointer
4798  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4799  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4800  *
4801  * Print human readable fault information (SI).
4802  */
4803 static void si_vm_decode_fault(struct radeon_device *rdev,
4804                                u32 status, u32 addr)
4805 {
4806         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4807         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4808         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4809         char *block;
4810
4811         if (rdev->family == CHIP_TAHITI) {
4812                 switch (mc_id) {
4813                 case 160:
4814                 case 144:
4815                 case 96:
4816                 case 80:
4817                 case 224:
4818                 case 208:
4819                 case 32:
4820                 case 16:
4821                         block = "CB";
4822                         break;
4823                 case 161:
4824                 case 145:
4825                 case 97:
4826                 case 81:
4827                 case 225:
4828                 case 209:
4829                 case 33:
4830                 case 17:
4831                         block = "CB_FMASK";
4832                         break;
4833                 case 162:
4834                 case 146:
4835                 case 98:
4836                 case 82:
4837                 case 226:
4838                 case 210:
4839                 case 34:
4840                 case 18:
4841                         block = "CB_CMASK";
4842                         break;
4843                 case 163:
4844                 case 147:
4845                 case 99:
4846                 case 83:
4847                 case 227:
4848                 case 211:
4849                 case 35:
4850                 case 19:
4851                         block = "CB_IMMED";
4852                         break;
4853                 case 164:
4854                 case 148:
4855                 case 100:
4856                 case 84:
4857                 case 228:
4858                 case 212:
4859                 case 36:
4860                 case 20:
4861                         block = "DB";
4862                         break;
4863                 case 165:
4864                 case 149:
4865                 case 101:
4866                 case 85:
4867                 case 229:
4868                 case 213:
4869                 case 37:
4870                 case 21:
4871                         block = "DB_HTILE";
4872                         break;
4873                 case 167:
4874                 case 151:
4875                 case 103:
4876                 case 87:
4877                 case 231:
4878                 case 215:
4879                 case 39:
4880                 case 23:
4881                         block = "DB_STEN";
4882                         break;
4883                 case 72:
4884                 case 68:
4885                 case 64:
4886                 case 8:
4887                 case 4:
4888                 case 0:
4889                 case 136:
4890                 case 132:
4891                 case 128:
4892                 case 200:
4893                 case 196:
4894                 case 192:
4895                         block = "TC";
4896                         break;
4897                 case 112:
4898                 case 48:
4899                         block = "CP";
4900                         break;
4901                 case 49:
4902                 case 177:
4903                 case 50:
4904                 case 178:
4905                         block = "SH";
4906                         break;
4907                 case 53:
4908                 case 190:
4909                         block = "VGT";
4910                         break;
4911                 case 117:
4912                         block = "IH";
4913                         break;
4914                 case 51:
4915                 case 115:
4916                         block = "RLC";
4917                         break;
4918                 case 119:
4919                 case 183:
4920                         block = "DMA0";
4921                         break;
4922                 case 61:
4923                         block = "DMA1";
4924                         break;
4925                 case 248:
4926                 case 120:
4927                         block = "HDP";
4928                         break;
4929                 default:
4930                         block = "unknown";
4931                         break;
4932                 }
4933         } else {
4934                 switch (mc_id) {
4935                 case 32:
4936                 case 16:
4937                 case 96:
4938                 case 80:
4939                 case 160:
4940                 case 144:
4941                 case 224:
4942                 case 208:
4943                         block = "CB";
4944                         break;
4945                 case 33:
4946                 case 17:
4947                 case 97:
4948                 case 81:
4949                 case 161:
4950                 case 145:
4951                 case 225:
4952                 case 209:
4953                         block = "CB_FMASK";
4954                         break;
4955                 case 34:
4956                 case 18:
4957                 case 98:
4958                 case 82:
4959                 case 162:
4960                 case 146:
4961                 case 226:
4962                 case 210:
4963                         block = "CB_CMASK";
4964                         break;
4965                 case 35:
4966                 case 19:
4967                 case 99:
4968                 case 83:
4969                 case 163:
4970                 case 147:
4971                 case 227:
4972                 case 211:
4973                         block = "CB_IMMED";
4974                         break;
4975                 case 36:
4976                 case 20:
4977                 case 100:
4978                 case 84:
4979                 case 164:
4980                 case 148:
4981                 case 228:
4982                 case 212:
4983                         block = "DB";
4984                         break;
4985                 case 37:
4986                 case 21:
4987                 case 101:
4988                 case 85:
4989                 case 165:
4990                 case 149:
4991                 case 229:
4992                 case 213:
4993                         block = "DB_HTILE";
4994                         break;
4995                 case 39:
4996                 case 23:
4997                 case 103:
4998                 case 87:
4999                 case 167:
5000                 case 151:
5001                 case 231:
5002                 case 215:
5003                         block = "DB_STEN";
5004                         break;
5005                 case 72:
5006                 case 68:
5007                 case 8:
5008                 case 4:
5009                 case 136:
5010                 case 132:
5011                 case 200:
5012                 case 196:
5013                         block = "TC";
5014                         break;
5015                 case 112:
5016                 case 48:
5017                         block = "CP";
5018                         break;
5019                 case 49:
5020                 case 177:
5021                 case 50:
5022                 case 178:
5023                         block = "SH";
5024                         break;
5025                 case 53:
5026                         block = "VGT";
5027                         break;
5028                 case 117:
5029                         block = "IH";
5030                         break;
5031                 case 51:
5032                 case 115:
5033                         block = "RLC";
5034                         break;
5035                 case 119:
5036                 case 183:
5037                         block = "DMA0";
5038                         break;
5039                 case 61:
5040                         block = "DMA1";
5041                         break;
5042                 case 248:
5043                 case 120:
5044                         block = "HDP";
5045                         break;
5046                 default:
5047                         block = "unknown";
5048                         break;
5049                 }
5050         }
5051
5052         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5053                protections, vmid, addr,
5054                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5055                block, mc_id);
5056 }
5057
5058 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5059                  unsigned vm_id, uint64_t pd_addr)
5060 {
5061         /* write new base address */
5062         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5063         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5064                                  WRITE_DATA_DST_SEL(0)));
5065
5066         if (vm_id < 8) {
5067                 radeon_ring_write(ring,
5068                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5069         } else {
5070                 radeon_ring_write(ring,
5071                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5072         }
5073         radeon_ring_write(ring, 0);
5074         radeon_ring_write(ring, pd_addr >> 12);
5075
5076         /* flush hdp cache */
5077         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5078         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5079                                  WRITE_DATA_DST_SEL(0)));
5080         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5081         radeon_ring_write(ring, 0);
5082         radeon_ring_write(ring, 0x1);
5083
5084         /* bits 0-15 are the VM contexts0-15 */
5085         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5086         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5087                                  WRITE_DATA_DST_SEL(0)));
5088         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5089         radeon_ring_write(ring, 0);
5090         radeon_ring_write(ring, 1 << vm_id);
5091
5092         /* wait for the invalidate to complete */
5093         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5094         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5095                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5096         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5097         radeon_ring_write(ring, 0);
5098         radeon_ring_write(ring, 0); /* ref */
5099         radeon_ring_write(ring, 0); /* mask */
5100         radeon_ring_write(ring, 0x20); /* poll interval */
5101
5102         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5103         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5104         radeon_ring_write(ring, 0x0);
5105 }
5106
5107 /*
5108  *  Power and clock gating
5109  */
5110 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5111 {
5112         int i;
5113
5114         for (i = 0; i < rdev->usec_timeout; i++) {
5115                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5116                         break;
5117                 udelay(1);
5118         }
5119
5120         for (i = 0; i < rdev->usec_timeout; i++) {
5121                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5122                         break;
5123                 udelay(1);
5124         }
5125 }
5126
5127 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5128                                          bool enable)
5129 {
5130         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5131         u32 mask;
5132         int i;
5133
5134         if (enable)
5135                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5136         else
5137                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5138         WREG32(CP_INT_CNTL_RING0, tmp);
5139
5140         if (!enable) {
5141                 /* read a gfx register */
5142                 tmp = RREG32(DB_DEPTH_INFO);
5143
5144                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5145                 for (i = 0; i < rdev->usec_timeout; i++) {
5146                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5147                                 break;
5148                         udelay(1);
5149                 }
5150         }
5151 }
5152
5153 static void si_set_uvd_dcm(struct radeon_device *rdev,
5154                            bool sw_mode)
5155 {
5156         u32 tmp, tmp2;
5157
5158         tmp = RREG32(UVD_CGC_CTRL);
5159         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5160         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5161
5162         if (sw_mode) {
5163                 tmp &= ~0x7ffff800;
5164                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5165         } else {
5166                 tmp |= 0x7ffff800;
5167                 tmp2 = 0;
5168         }
5169
5170         WREG32(UVD_CGC_CTRL, tmp);
5171         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5172 }
5173
5174 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5175 {
5176         bool hw_mode = true;
5177
5178         if (hw_mode) {
5179                 si_set_uvd_dcm(rdev, false);
5180         } else {
5181                 u32 tmp = RREG32(UVD_CGC_CTRL);
5182                 tmp &= ~DCM;
5183                 WREG32(UVD_CGC_CTRL, tmp);
5184         }
5185 }
5186
5187 static u32 si_halt_rlc(struct radeon_device *rdev)
5188 {
5189         u32 data, orig;
5190
5191         orig = data = RREG32(RLC_CNTL);
5192
5193         if (data & RLC_ENABLE) {
5194                 data &= ~RLC_ENABLE;
5195                 WREG32(RLC_CNTL, data);
5196
5197                 si_wait_for_rlc_serdes(rdev);
5198         }
5199
5200         return orig;
5201 }
5202
5203 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5204 {
5205         u32 tmp;
5206
5207         tmp = RREG32(RLC_CNTL);
5208         if (tmp != rlc)
5209                 WREG32(RLC_CNTL, rlc);
5210 }
5211
5212 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5213 {
5214         u32 data, orig;
5215
5216         orig = data = RREG32(DMA_PG);
5217         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5218                 data |= PG_CNTL_ENABLE;
5219         else
5220                 data &= ~PG_CNTL_ENABLE;
5221         if (orig != data)
5222                 WREG32(DMA_PG, data);
5223 }
5224
5225 static void si_init_dma_pg(struct radeon_device *rdev)
5226 {
5227         u32 tmp;
5228
5229         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5230         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5231
5232         for (tmp = 0; tmp < 5; tmp++)
5233                 WREG32(DMA_PGFSM_WRITE, 0);
5234 }
5235
5236 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5237                                bool enable)
5238 {
5239         u32 tmp;
5240
5241         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5242                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5243                 WREG32(RLC_TTOP_D, tmp);
5244
5245                 tmp = RREG32(RLC_PG_CNTL);
5246                 tmp |= GFX_PG_ENABLE;
5247                 WREG32(RLC_PG_CNTL, tmp);
5248
5249                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5250                 tmp |= AUTO_PG_EN;
5251                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5252         } else {
5253                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5254                 tmp &= ~AUTO_PG_EN;
5255                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5256
5257                 tmp = RREG32(DB_RENDER_CONTROL);
5258         }
5259 }
5260
5261 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5262 {
5263         u32 tmp;
5264
5265         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5266
5267         tmp = RREG32(RLC_PG_CNTL);
5268         tmp |= GFX_PG_SRC;
5269         WREG32(RLC_PG_CNTL, tmp);
5270
5271         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5272
5273         tmp = RREG32(RLC_AUTO_PG_CTRL);
5274
5275         tmp &= ~GRBM_REG_SGIT_MASK;
5276         tmp |= GRBM_REG_SGIT(0x700);
5277         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5278         WREG32(RLC_AUTO_PG_CTRL, tmp);
5279 }
5280
5281 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5282 {
5283         u32 mask = 0, tmp, tmp1;
5284         int i;
5285
5286         si_select_se_sh(rdev, se, sh);
5287         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5288         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5289         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5290
5291         tmp &= 0xffff0000;
5292
5293         tmp |= tmp1;
5294         tmp >>= 16;
5295
5296         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5297                 mask <<= 1;
5298                 mask |= 1;
5299         }
5300
5301         return (~tmp) & mask;
5302 }
5303
5304 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5305 {
5306         u32 i, j, k, active_cu_number = 0;
5307         u32 mask, counter, cu_bitmap;
5308         u32 tmp = 0;
5309
5310         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5311                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5312                         mask = 1;
5313                         cu_bitmap = 0;
5314                         counter  = 0;
5315                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5316                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5317                                         if (counter < 2)
5318                                                 cu_bitmap |= mask;
5319                                         counter++;
5320                                 }
5321                                 mask <<= 1;
5322                         }
5323
5324                         active_cu_number += counter;
5325                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5326                 }
5327         }
5328
5329         WREG32(RLC_PG_AO_CU_MASK, tmp);
5330
5331         tmp = RREG32(RLC_MAX_PG_CU);
5332         tmp &= ~MAX_PU_CU_MASK;
5333         tmp |= MAX_PU_CU(active_cu_number);
5334         WREG32(RLC_MAX_PG_CU, tmp);
5335 }
5336
5337 static void si_enable_cgcg(struct radeon_device *rdev,
5338                            bool enable)
5339 {
5340         u32 data, orig, tmp;
5341
5342         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5343
5344         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5345                 si_enable_gui_idle_interrupt(rdev, true);
5346
5347                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5348
5349                 tmp = si_halt_rlc(rdev);
5350
5351                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5352                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5353                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5354
5355                 si_wait_for_rlc_serdes(rdev);
5356
5357                 si_update_rlc(rdev, tmp);
5358
5359                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5360
5361                 data |= CGCG_EN | CGLS_EN;
5362         } else {
5363                 si_enable_gui_idle_interrupt(rdev, false);
5364
5365                 RREG32(CB_CGTT_SCLK_CTRL);
5366                 RREG32(CB_CGTT_SCLK_CTRL);
5367                 RREG32(CB_CGTT_SCLK_CTRL);
5368                 RREG32(CB_CGTT_SCLK_CTRL);
5369
5370                 data &= ~(CGCG_EN | CGLS_EN);
5371         }
5372
5373         if (orig != data)
5374                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5375 }
5376
5377 static void si_enable_mgcg(struct radeon_device *rdev,
5378                            bool enable)
5379 {
5380         u32 data, orig, tmp = 0;
5381
5382         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5383                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5384                 data = 0x96940200;
5385                 if (orig != data)
5386                         WREG32(CGTS_SM_CTRL_REG, data);
5387
5388                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5389                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5390                         data |= CP_MEM_LS_EN;
5391                         if (orig != data)
5392                                 WREG32(CP_MEM_SLP_CNTL, data);
5393                 }
5394
5395                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5396                 data &= 0xffffffc0;
5397                 if (orig != data)
5398                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5399
5400                 tmp = si_halt_rlc(rdev);
5401
5402                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5403                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5404                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5405
5406                 si_update_rlc(rdev, tmp);
5407         } else {
5408                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5409                 data |= 0x00000003;
5410                 if (orig != data)
5411                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5412
5413                 data = RREG32(CP_MEM_SLP_CNTL);
5414                 if (data & CP_MEM_LS_EN) {
5415                         data &= ~CP_MEM_LS_EN;
5416                         WREG32(CP_MEM_SLP_CNTL, data);
5417                 }
5418                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5419                 data |= LS_OVERRIDE | OVERRIDE;
5420                 if (orig != data)
5421                         WREG32(CGTS_SM_CTRL_REG, data);
5422
5423                 tmp = si_halt_rlc(rdev);
5424
5425                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5426                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5427                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5428
5429                 si_update_rlc(rdev, tmp);
5430         }
5431 }
5432
5433 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5434                                bool enable)
5435 {
5436         u32 orig, data, tmp;
5437
5438         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5439                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5440                 tmp |= 0x3fff;
5441                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5442
5443                 orig = data = RREG32(UVD_CGC_CTRL);
5444                 data |= DCM;
5445                 if (orig != data)
5446                         WREG32(UVD_CGC_CTRL, data);
5447
5448                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5449                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5450         } else {
5451                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5452                 tmp &= ~0x3fff;
5453                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5454
5455                 orig = data = RREG32(UVD_CGC_CTRL);
5456                 data &= ~DCM;
5457                 if (orig != data)
5458                         WREG32(UVD_CGC_CTRL, data);
5459
5460                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5461                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5462         }
5463 }
5464
5465 static const u32 mc_cg_registers[] =
5466 {
5467         MC_HUB_MISC_HUB_CG,
5468         MC_HUB_MISC_SIP_CG,
5469         MC_HUB_MISC_VM_CG,
5470         MC_XPB_CLK_GAT,
5471         ATC_MISC_CG,
5472         MC_CITF_MISC_WR_CG,
5473         MC_CITF_MISC_RD_CG,
5474         MC_CITF_MISC_VM_CG,
5475         VM_L2_CG,
5476 };
5477
5478 static void si_enable_mc_ls(struct radeon_device *rdev,
5479                             bool enable)
5480 {
5481         int i;
5482         u32 orig, data;
5483
5484         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5485                 orig = data = RREG32(mc_cg_registers[i]);
5486                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5487                         data |= MC_LS_ENABLE;
5488                 else
5489                         data &= ~MC_LS_ENABLE;
5490                 if (data != orig)
5491                         WREG32(mc_cg_registers[i], data);
5492         }
5493 }
5494
5495 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5496                                bool enable)
5497 {
5498         int i;
5499         u32 orig, data;
5500
5501         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5502                 orig = data = RREG32(mc_cg_registers[i]);
5503                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5504                         data |= MC_CG_ENABLE;
5505                 else
5506                         data &= ~MC_CG_ENABLE;
5507                 if (data != orig)
5508                         WREG32(mc_cg_registers[i], data);
5509         }
5510 }
5511
5512 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5513                                bool enable)
5514 {
5515         u32 orig, data, offset;
5516         int i;
5517
5518         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5519                 for (i = 0; i < 2; i++) {
5520                         if (i == 0)
5521                                 offset = DMA0_REGISTER_OFFSET;
5522                         else
5523                                 offset = DMA1_REGISTER_OFFSET;
5524                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5525                         data &= ~MEM_POWER_OVERRIDE;
5526                         if (data != orig)
5527                                 WREG32(DMA_POWER_CNTL + offset, data);
5528                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5529                 }
5530         } else {
5531                 for (i = 0; i < 2; i++) {
5532                         if (i == 0)
5533                                 offset = DMA0_REGISTER_OFFSET;
5534                         else
5535                                 offset = DMA1_REGISTER_OFFSET;
5536                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5537                         data |= MEM_POWER_OVERRIDE;
5538                         if (data != orig)
5539                                 WREG32(DMA_POWER_CNTL + offset, data);
5540
5541                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5542                         data = 0xff000000;
5543                         if (data != orig)
5544                                 WREG32(DMA_CLK_CTRL + offset, data);
5545                 }
5546         }
5547 }
5548
5549 static void si_enable_bif_mgls(struct radeon_device *rdev,
5550                                bool enable)
5551 {
5552         u32 orig, data;
5553
5554         orig = data = RREG32_PCIE(PCIE_CNTL2);
5555
5556         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5557                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5558                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5559         else
5560                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5561                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5562
5563         if (orig != data)
5564                 WREG32_PCIE(PCIE_CNTL2, data);
5565 }
5566
5567 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5568                                bool enable)
5569 {
5570         u32 orig, data;
5571
5572         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5573
5574         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5575                 data &= ~CLOCK_GATING_DIS;
5576         else
5577                 data |= CLOCK_GATING_DIS;
5578
5579         if (orig != data)
5580                 WREG32(HDP_HOST_PATH_CNTL, data);
5581 }
5582
5583 static void si_enable_hdp_ls(struct radeon_device *rdev,
5584                              bool enable)
5585 {
5586         u32 orig, data;
5587
5588         orig = data = RREG32(HDP_MEM_POWER_LS);
5589
5590         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5591                 data |= HDP_LS_ENABLE;
5592         else
5593                 data &= ~HDP_LS_ENABLE;
5594
5595         if (orig != data)
5596                 WREG32(HDP_MEM_POWER_LS, data);
5597 }
5598
5599 static void si_update_cg(struct radeon_device *rdev,
5600                          u32 block, bool enable)
5601 {
5602         if (block & RADEON_CG_BLOCK_GFX) {
5603                 si_enable_gui_idle_interrupt(rdev, false);
5604                 /* order matters! */
5605                 if (enable) {
5606                         si_enable_mgcg(rdev, true);
5607                         si_enable_cgcg(rdev, true);
5608                 } else {
5609                         si_enable_cgcg(rdev, false);
5610                         si_enable_mgcg(rdev, false);
5611                 }
5612                 si_enable_gui_idle_interrupt(rdev, true);
5613         }
5614
5615         if (block & RADEON_CG_BLOCK_MC) {
5616                 si_enable_mc_mgcg(rdev, enable);
5617                 si_enable_mc_ls(rdev, enable);
5618         }
5619
5620         if (block & RADEON_CG_BLOCK_SDMA) {
5621                 si_enable_dma_mgcg(rdev, enable);
5622         }
5623
5624         if (block & RADEON_CG_BLOCK_BIF) {
5625                 si_enable_bif_mgls(rdev, enable);
5626         }
5627
5628         if (block & RADEON_CG_BLOCK_UVD) {
5629                 if (rdev->has_uvd) {
5630                         si_enable_uvd_mgcg(rdev, enable);
5631                 }
5632         }
5633
5634         if (block & RADEON_CG_BLOCK_HDP) {
5635                 si_enable_hdp_mgcg(rdev, enable);
5636                 si_enable_hdp_ls(rdev, enable);
5637         }
5638 }
5639
5640 static void si_init_cg(struct radeon_device *rdev)
5641 {
5642         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5643                             RADEON_CG_BLOCK_MC |
5644                             RADEON_CG_BLOCK_SDMA |
5645                             RADEON_CG_BLOCK_BIF |
5646                             RADEON_CG_BLOCK_HDP), true);
5647         if (rdev->has_uvd) {
5648                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5649                 si_init_uvd_internal_cg(rdev);
5650         }
5651 }
5652
5653 static void si_fini_cg(struct radeon_device *rdev)
5654 {
5655         if (rdev->has_uvd) {
5656                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5657         }
5658         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5659                             RADEON_CG_BLOCK_MC |
5660                             RADEON_CG_BLOCK_SDMA |
5661                             RADEON_CG_BLOCK_BIF |
5662                             RADEON_CG_BLOCK_HDP), false);
5663 }
5664
5665 u32 si_get_csb_size(struct radeon_device *rdev)
5666 {
5667         u32 count = 0;
5668         const struct cs_section_def *sect = NULL;
5669         const struct cs_extent_def *ext = NULL;
5670
5671         if (rdev->rlc.cs_data == NULL)
5672                 return 0;
5673
5674         /* begin clear state */
5675         count += 2;
5676         /* context control state */
5677         count += 3;
5678
5679         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5680                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5681                         if (sect->id == SECT_CONTEXT)
5682                                 count += 2 + ext->reg_count;
5683                         else
5684                                 return 0;
5685                 }
5686         }
5687         /* pa_sc_raster_config */
5688         count += 3;
5689         /* end clear state */
5690         count += 2;
5691         /* clear state */
5692         count += 2;
5693
5694         return count;
5695 }
5696
5697 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5698 {
5699         u32 count = 0, i;
5700         const struct cs_section_def *sect = NULL;
5701         const struct cs_extent_def *ext = NULL;
5702
5703         if (rdev->rlc.cs_data == NULL)
5704                 return;
5705         if (buffer == NULL)
5706                 return;
5707
5708         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5709         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5710
5711         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5712         buffer[count++] = cpu_to_le32(0x80000000);
5713         buffer[count++] = cpu_to_le32(0x80000000);
5714
5715         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5716                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5717                         if (sect->id == SECT_CONTEXT) {
5718                                 buffer[count++] =
5719                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5720                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5721                                 for (i = 0; i < ext->reg_count; i++)
5722                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5723                         } else {
5724                                 return;
5725                         }
5726                 }
5727         }
5728
5729         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5730         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5731         switch (rdev->family) {
5732         case CHIP_TAHITI:
5733         case CHIP_PITCAIRN:
5734                 buffer[count++] = cpu_to_le32(0x2a00126a);
5735                 break;
5736         case CHIP_VERDE:
5737                 buffer[count++] = cpu_to_le32(0x0000124a);
5738                 break;
5739         case CHIP_OLAND:
5740                 buffer[count++] = cpu_to_le32(0x00000082);
5741                 break;
5742         case CHIP_HAINAN:
5743                 buffer[count++] = cpu_to_le32(0x00000000);
5744                 break;
5745         default:
5746                 buffer[count++] = cpu_to_le32(0x00000000);
5747                 break;
5748         }
5749
5750         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5751         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5752
5753         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5754         buffer[count++] = cpu_to_le32(0);
5755 }
5756
5757 static void si_init_pg(struct radeon_device *rdev)
5758 {
5759         if (rdev->pg_flags) {
5760                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5761                         si_init_dma_pg(rdev);
5762                 }
5763                 si_init_ao_cu_mask(rdev);
5764                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5765                         si_init_gfx_cgpg(rdev);
5766                 } else {
5767                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5768                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5769                 }
5770                 si_enable_dma_pg(rdev, true);
5771                 si_enable_gfx_cgpg(rdev, true);
5772         } else {
5773                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5774                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5775         }
5776 }
5777
5778 static void si_fini_pg(struct radeon_device *rdev)
5779 {
5780         if (rdev->pg_flags) {
5781                 si_enable_dma_pg(rdev, false);
5782                 si_enable_gfx_cgpg(rdev, false);
5783         }
5784 }
5785
5786 /*
5787  * RLC
5788  */
5789 void si_rlc_reset(struct radeon_device *rdev)
5790 {
5791         u32 tmp = RREG32(GRBM_SOFT_RESET);
5792
5793         tmp |= SOFT_RESET_RLC;
5794         WREG32(GRBM_SOFT_RESET, tmp);
5795         udelay(50);
5796         tmp &= ~SOFT_RESET_RLC;
5797         WREG32(GRBM_SOFT_RESET, tmp);
5798         udelay(50);
5799 }
5800
5801 static void si_rlc_stop(struct radeon_device *rdev)
5802 {
5803         WREG32(RLC_CNTL, 0);
5804
5805         si_enable_gui_idle_interrupt(rdev, false);
5806
5807         si_wait_for_rlc_serdes(rdev);
5808 }
5809
5810 static void si_rlc_start(struct radeon_device *rdev)
5811 {
5812         WREG32(RLC_CNTL, RLC_ENABLE);
5813
5814         si_enable_gui_idle_interrupt(rdev, true);
5815
5816         udelay(50);
5817 }
5818
5819 static bool si_lbpw_supported(struct radeon_device *rdev)
5820 {
5821         u32 tmp;
5822
5823         /* Enable LBPW only for DDR3 */
5824         tmp = RREG32(MC_SEQ_MISC0);
5825         if ((tmp & 0xF0000000) == 0xB0000000)
5826                 return true;
5827         return false;
5828 }
5829
5830 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5831 {
5832         u32 tmp;
5833
5834         tmp = RREG32(RLC_LB_CNTL);
5835         if (enable)
5836                 tmp |= LOAD_BALANCE_ENABLE;
5837         else
5838                 tmp &= ~LOAD_BALANCE_ENABLE;
5839         WREG32(RLC_LB_CNTL, tmp);
5840
5841         if (!enable) {
5842                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5843                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5844         }
5845 }
5846
5847 static int si_rlc_resume(struct radeon_device *rdev)
5848 {
5849         u32 i;
5850
5851         if (!rdev->rlc_fw)
5852                 return -EINVAL;
5853
5854         si_rlc_stop(rdev);
5855
5856         si_rlc_reset(rdev);
5857
5858         si_init_pg(rdev);
5859
5860         si_init_cg(rdev);
5861
5862         WREG32(RLC_RL_BASE, 0);
5863         WREG32(RLC_RL_SIZE, 0);
5864         WREG32(RLC_LB_CNTL, 0);
5865         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5866         WREG32(RLC_LB_CNTR_INIT, 0);
5867         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5868
5869         WREG32(RLC_MC_CNTL, 0);
5870         WREG32(RLC_UCODE_CNTL, 0);
5871
5872         if (rdev->new_fw) {
5873                 const struct rlc_firmware_header_v1_0 *hdr =
5874                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5875                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5876                 const __le32 *fw_data = (const __le32 *)
5877                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5878
5879                 radeon_ucode_print_rlc_hdr(&hdr->header);
5880
5881                 for (i = 0; i < fw_size; i++) {
5882                         WREG32(RLC_UCODE_ADDR, i);
5883                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5884                 }
5885         } else {
5886                 const __be32 *fw_data =
5887                         (const __be32 *)rdev->rlc_fw->data;
5888                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5889                         WREG32(RLC_UCODE_ADDR, i);
5890                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5891                 }
5892         }
5893         WREG32(RLC_UCODE_ADDR, 0);
5894
5895         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5896
5897         si_rlc_start(rdev);
5898
5899         return 0;
5900 }
5901
5902 static void si_enable_interrupts(struct radeon_device *rdev)
5903 {
5904         u32 ih_cntl = RREG32(IH_CNTL);
5905         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5906
5907         ih_cntl |= ENABLE_INTR;
5908         ih_rb_cntl |= IH_RB_ENABLE;
5909         WREG32(IH_CNTL, ih_cntl);
5910         WREG32(IH_RB_CNTL, ih_rb_cntl);
5911         rdev->ih.enabled = true;
5912 }
5913
5914 static void si_disable_interrupts(struct radeon_device *rdev)
5915 {
5916         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5917         u32 ih_cntl = RREG32(IH_CNTL);
5918
5919         ih_rb_cntl &= ~IH_RB_ENABLE;
5920         ih_cntl &= ~ENABLE_INTR;
5921         WREG32(IH_RB_CNTL, ih_rb_cntl);
5922         WREG32(IH_CNTL, ih_cntl);
5923         /* set rptr, wptr to 0 */
5924         WREG32(IH_RB_RPTR, 0);
5925         WREG32(IH_RB_WPTR, 0);
5926         rdev->ih.enabled = false;
5927         rdev->ih.rptr = 0;
5928 }
5929
5930 static void si_disable_interrupt_state(struct radeon_device *rdev)
5931 {
5932         u32 tmp;
5933
5934         tmp = RREG32(CP_INT_CNTL_RING0) &
5935                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5936         WREG32(CP_INT_CNTL_RING0, tmp);
5937         WREG32(CP_INT_CNTL_RING1, 0);
5938         WREG32(CP_INT_CNTL_RING2, 0);
5939         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5940         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5941         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5942         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5943         WREG32(GRBM_INT_CNTL, 0);
5944         WREG32(SRBM_INT_CNTL, 0);
5945         if (rdev->num_crtc >= 2) {
5946                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5947                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5948         }
5949         if (rdev->num_crtc >= 4) {
5950                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5951                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5952         }
5953         if (rdev->num_crtc >= 6) {
5954                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5955                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5956         }
5957
5958         if (rdev->num_crtc >= 2) {
5959                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5960                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5961         }
5962         if (rdev->num_crtc >= 4) {
5963                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5964                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5965         }
5966         if (rdev->num_crtc >= 6) {
5967                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5968                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5969         }
5970
5971         if (!ASIC_IS_NODCE(rdev)) {
5972                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5973
5974                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5975                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5976                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5977                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5978                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5979                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5980                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5981                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5982                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5983                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5984                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5985                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5986         }
5987 }
5988
5989 static int si_irq_init(struct radeon_device *rdev)
5990 {
5991         int ret = 0;
5992         int rb_bufsz;
5993         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5994
5995         /* allocate ring */
5996         ret = r600_ih_ring_alloc(rdev);
5997         if (ret)
5998                 return ret;
5999
6000         /* disable irqs */
6001         si_disable_interrupts(rdev);
6002
6003         /* init rlc */
6004         ret = si_rlc_resume(rdev);
6005         if (ret) {
6006                 r600_ih_ring_fini(rdev);
6007                 return ret;
6008         }
6009
6010         /* setup interrupt control */
6011         /* set dummy read address to ring address */
6012         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6013         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6014         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6015          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6016          */
6017         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6018         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6019         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6020         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6021
6022         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6023         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6024
6025         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6026                       IH_WPTR_OVERFLOW_CLEAR |
6027                       (rb_bufsz << 1));
6028
6029         if (rdev->wb.enabled)
6030                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6031
6032         /* set the writeback address whether it's enabled or not */
6033         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6034         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6035
6036         WREG32(IH_RB_CNTL, ih_rb_cntl);
6037
6038         /* set rptr, wptr to 0 */
6039         WREG32(IH_RB_RPTR, 0);
6040         WREG32(IH_RB_WPTR, 0);
6041
6042         /* Default settings for IH_CNTL (disabled at first) */
6043         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6044         /* RPTR_REARM only works if msi's are enabled */
6045         if (rdev->msi_enabled)
6046                 ih_cntl |= RPTR_REARM;
6047         WREG32(IH_CNTL, ih_cntl);
6048
6049         /* force the active interrupt state to all disabled */
6050         si_disable_interrupt_state(rdev);
6051
6052         pci_set_master(rdev->pdev);
6053
6054         /* enable irqs */
6055         si_enable_interrupts(rdev);
6056
6057         return ret;
6058 }
6059
6060 int si_irq_set(struct radeon_device *rdev)
6061 {
6062         u32 cp_int_cntl;
6063         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6064         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6065         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6066         u32 grbm_int_cntl = 0;
6067         u32 dma_cntl, dma_cntl1;
6068         u32 thermal_int = 0;
6069
6070         if (!rdev->irq.installed) {
6071                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6072                 return -EINVAL;
6073         }
6074         /* don't enable anything if the ih is disabled */
6075         if (!rdev->ih.enabled) {
6076                 si_disable_interrupts(rdev);
6077                 /* force the active interrupt state to all disabled */
6078                 si_disable_interrupt_state(rdev);
6079                 return 0;
6080         }
6081
6082         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6083                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6084
6085         if (!ASIC_IS_NODCE(rdev)) {
6086                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6087                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6088                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6089                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6090                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6091                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6092         }
6093
6094         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6095         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6096
6097         thermal_int = RREG32(CG_THERMAL_INT) &
6098                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6099
6100         /* enable CP interrupts on all rings */
6101         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6102                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6103                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6104         }
6105         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6106                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6107                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6108         }
6109         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6110                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6111                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6112         }
6113         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6114                 DRM_DEBUG("si_irq_set: sw int dma\n");
6115                 dma_cntl |= TRAP_ENABLE;
6116         }
6117
6118         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6119                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6120                 dma_cntl1 |= TRAP_ENABLE;
6121         }
6122         if (rdev->irq.crtc_vblank_int[0] ||
6123             atomic_read(&rdev->irq.pflip[0])) {
6124                 DRM_DEBUG("si_irq_set: vblank 0\n");
6125                 crtc1 |= VBLANK_INT_MASK;
6126         }
6127         if (rdev->irq.crtc_vblank_int[1] ||
6128             atomic_read(&rdev->irq.pflip[1])) {
6129                 DRM_DEBUG("si_irq_set: vblank 1\n");
6130                 crtc2 |= VBLANK_INT_MASK;
6131         }
6132         if (rdev->irq.crtc_vblank_int[2] ||
6133             atomic_read(&rdev->irq.pflip[2])) {
6134                 DRM_DEBUG("si_irq_set: vblank 2\n");
6135                 crtc3 |= VBLANK_INT_MASK;
6136         }
6137         if (rdev->irq.crtc_vblank_int[3] ||
6138             atomic_read(&rdev->irq.pflip[3])) {
6139                 DRM_DEBUG("si_irq_set: vblank 3\n");
6140                 crtc4 |= VBLANK_INT_MASK;
6141         }
6142         if (rdev->irq.crtc_vblank_int[4] ||
6143             atomic_read(&rdev->irq.pflip[4])) {
6144                 DRM_DEBUG("si_irq_set: vblank 4\n");
6145                 crtc5 |= VBLANK_INT_MASK;
6146         }
6147         if (rdev->irq.crtc_vblank_int[5] ||
6148             atomic_read(&rdev->irq.pflip[5])) {
6149                 DRM_DEBUG("si_irq_set: vblank 5\n");
6150                 crtc6 |= VBLANK_INT_MASK;
6151         }
6152         if (rdev->irq.hpd[0]) {
6153                 DRM_DEBUG("si_irq_set: hpd 1\n");
6154                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6155         }
6156         if (rdev->irq.hpd[1]) {
6157                 DRM_DEBUG("si_irq_set: hpd 2\n");
6158                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6159         }
6160         if (rdev->irq.hpd[2]) {
6161                 DRM_DEBUG("si_irq_set: hpd 3\n");
6162                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6163         }
6164         if (rdev->irq.hpd[3]) {
6165                 DRM_DEBUG("si_irq_set: hpd 4\n");
6166                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6167         }
6168         if (rdev->irq.hpd[4]) {
6169                 DRM_DEBUG("si_irq_set: hpd 5\n");
6170                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6171         }
6172         if (rdev->irq.hpd[5]) {
6173                 DRM_DEBUG("si_irq_set: hpd 6\n");
6174                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6175         }
6176
6177         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6178         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6179         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6180
6181         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6182         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6183
6184         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6185
6186         if (rdev->irq.dpm_thermal) {
6187                 DRM_DEBUG("dpm thermal\n");
6188                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6189         }
6190
6191         if (rdev->num_crtc >= 2) {
6192                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6193                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6194         }
6195         if (rdev->num_crtc >= 4) {
6196                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6197                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6198         }
6199         if (rdev->num_crtc >= 6) {
6200                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6201                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6202         }
6203
6204         if (rdev->num_crtc >= 2) {
6205                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6206                        GRPH_PFLIP_INT_MASK);
6207                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6208                        GRPH_PFLIP_INT_MASK);
6209         }
6210         if (rdev->num_crtc >= 4) {
6211                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6212                        GRPH_PFLIP_INT_MASK);
6213                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6214                        GRPH_PFLIP_INT_MASK);
6215         }
6216         if (rdev->num_crtc >= 6) {
6217                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6218                        GRPH_PFLIP_INT_MASK);
6219                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6220                        GRPH_PFLIP_INT_MASK);
6221         }
6222
6223         if (!ASIC_IS_NODCE(rdev)) {
6224                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6225                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6226                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6227                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6228                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6229                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6230         }
6231
6232         WREG32(CG_THERMAL_INT, thermal_int);
6233
6234         /* posting read */
6235         RREG32(SRBM_STATUS);
6236
6237         return 0;
6238 }
6239
6240 static inline void si_irq_ack(struct radeon_device *rdev)
6241 {
6242         u32 tmp;
6243
6244         if (ASIC_IS_NODCE(rdev))
6245                 return;
6246
6247         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6248         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6249         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6250         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6251         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6252         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6253         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6254         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6255         if (rdev->num_crtc >= 4) {
6256                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6257                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6258         }
6259         if (rdev->num_crtc >= 6) {
6260                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6261                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6262         }
6263
6264         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6265                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6266         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6267                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6268         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6269                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6270         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6271                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6272         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6273                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6274         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6275                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6276
6277         if (rdev->num_crtc >= 4) {
6278                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6279                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6280                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6281                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6282                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6283                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6284                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6285                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6286                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6287                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6288                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6289                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6290         }
6291
6292         if (rdev->num_crtc >= 6) {
6293                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6294                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6295                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6296                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6297                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6298                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6299                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6300                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6301                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6302                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6303                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6304                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6305         }
6306
6307         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6308                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6309                 tmp |= DC_HPDx_INT_ACK;
6310                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6311         }
6312         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6313                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6314                 tmp |= DC_HPDx_INT_ACK;
6315                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6316         }
6317         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6318                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6319                 tmp |= DC_HPDx_INT_ACK;
6320                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6321         }
6322         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6323                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6324                 tmp |= DC_HPDx_INT_ACK;
6325                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6326         }
6327         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6328                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6329                 tmp |= DC_HPDx_INT_ACK;
6330                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6331         }
6332         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6333                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6334                 tmp |= DC_HPDx_INT_ACK;
6335                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6336         }
6337
6338         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6339                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6340                 tmp |= DC_HPDx_RX_INT_ACK;
6341                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6342         }
6343         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6344                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6345                 tmp |= DC_HPDx_RX_INT_ACK;
6346                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6347         }
6348         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6349                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6350                 tmp |= DC_HPDx_RX_INT_ACK;
6351                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6352         }
6353         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6354                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6355                 tmp |= DC_HPDx_RX_INT_ACK;
6356                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6357         }
6358         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6359                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6360                 tmp |= DC_HPDx_RX_INT_ACK;
6361                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6362         }
6363         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6364                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6365                 tmp |= DC_HPDx_RX_INT_ACK;
6366                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6367         }
6368 }
6369
6370 static void si_irq_disable(struct radeon_device *rdev)
6371 {
6372         si_disable_interrupts(rdev);
6373         /* Wait and acknowledge irq */
6374         mdelay(1);
6375         si_irq_ack(rdev);
6376         si_disable_interrupt_state(rdev);
6377 }
6378
6379 static void si_irq_suspend(struct radeon_device *rdev)
6380 {
6381         si_irq_disable(rdev);
6382         si_rlc_stop(rdev);
6383 }
6384
6385 static void si_irq_fini(struct radeon_device *rdev)
6386 {
6387         si_irq_suspend(rdev);
6388         r600_ih_ring_fini(rdev);
6389 }
6390
6391 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6392 {
6393         u32 wptr, tmp;
6394
6395         if (rdev->wb.enabled)
6396                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6397         else
6398                 wptr = RREG32(IH_RB_WPTR);
6399
6400         if (wptr & RB_OVERFLOW) {
6401                 wptr &= ~RB_OVERFLOW;
6402                 /* When a ring buffer overflow happen start parsing interrupt
6403                  * from the last not overwritten vector (wptr + 16). Hopefully
6404                  * this should allow us to catchup.
6405                  */
6406                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6407                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6408                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6409                 tmp = RREG32(IH_RB_CNTL);
6410                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6411                 WREG32(IH_RB_CNTL, tmp);
6412         }
6413         return (wptr & rdev->ih.ptr_mask);
6414 }
6415
6416 /*        SI IV Ring
6417  * Each IV ring entry is 128 bits:
6418  * [7:0]    - interrupt source id
6419  * [31:8]   - reserved
6420  * [59:32]  - interrupt source data
6421  * [63:60]  - reserved
6422  * [71:64]  - RINGID
6423  * [79:72]  - VMID
6424  * [127:80] - reserved
6425  */
6426 int si_irq_process(struct radeon_device *rdev)
6427 {
6428         u32 wptr;
6429         u32 rptr;
6430         u32 src_id, src_data, ring_id;
6431         u32 ring_index;
6432         bool queue_hotplug = false;
6433         bool queue_dp = false;
6434         bool queue_thermal = false;
6435         u32 status, addr;
6436
6437         if (!rdev->ih.enabled || rdev->shutdown)
6438                 return IRQ_NONE;
6439
6440         wptr = si_get_ih_wptr(rdev);
6441
6442 restart_ih:
6443         /* is somebody else already processing irqs? */
6444         if (atomic_xchg(&rdev->ih.lock, 1))
6445                 return IRQ_NONE;
6446
6447         rptr = rdev->ih.rptr;
6448         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6449
6450         /* Order reading of wptr vs. reading of IH ring data */
6451         rmb();
6452
6453         /* display interrupts */
6454         si_irq_ack(rdev);
6455
6456         while (rptr != wptr) {
6457                 /* wptr/rptr are in bytes! */
6458                 ring_index = rptr / 4;
6459                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6460                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6461                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6462
6463                 switch (src_id) {
6464                 case 1: /* D1 vblank/vline */
6465                         switch (src_data) {
6466                         case 0: /* D1 vblank */
6467                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6468                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6469
6470                                 if (rdev->irq.crtc_vblank_int[0]) {
6471                                         drm_handle_vblank(rdev->ddev, 0);
6472                                         rdev->pm.vblank_sync = true;
6473                                         wake_up(&rdev->irq.vblank_queue);
6474                                 }
6475                                 if (atomic_read(&rdev->irq.pflip[0]))
6476                                         radeon_crtc_handle_vblank(rdev, 0);
6477                                 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6478                                 DRM_DEBUG("IH: D1 vblank\n");
6479
6480                                 break;
6481                         case 1: /* D1 vline */
6482                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6483                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6484
6485                                 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6486                                 DRM_DEBUG("IH: D1 vline\n");
6487
6488                                 break;
6489                         default:
6490                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6491                                 break;
6492                         }
6493                         break;
6494                 case 2: /* D2 vblank/vline */
6495                         switch (src_data) {
6496                         case 0: /* D2 vblank */
6497                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6498                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6499
6500                                 if (rdev->irq.crtc_vblank_int[1]) {
6501                                         drm_handle_vblank(rdev->ddev, 1);
6502                                         rdev->pm.vblank_sync = true;
6503                                         wake_up(&rdev->irq.vblank_queue);
6504                                 }
6505                                 if (atomic_read(&rdev->irq.pflip[1]))
6506                                         radeon_crtc_handle_vblank(rdev, 1);
6507                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6508                                 DRM_DEBUG("IH: D2 vblank\n");
6509
6510                                 break;
6511                         case 1: /* D2 vline */
6512                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6513                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6514
6515                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6516                                 DRM_DEBUG("IH: D2 vline\n");
6517
6518                                 break;
6519                         default:
6520                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6521                                 break;
6522                         }
6523                         break;
6524                 case 3: /* D3 vblank/vline */
6525                         switch (src_data) {
6526                         case 0: /* D3 vblank */
6527                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6528                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6529
6530                                 if (rdev->irq.crtc_vblank_int[2]) {
6531                                         drm_handle_vblank(rdev->ddev, 2);
6532                                         rdev->pm.vblank_sync = true;
6533                                         wake_up(&rdev->irq.vblank_queue);
6534                                 }
6535                                 if (atomic_read(&rdev->irq.pflip[2]))
6536                                         radeon_crtc_handle_vblank(rdev, 2);
6537                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6538                                 DRM_DEBUG("IH: D3 vblank\n");
6539
6540                                 break;
6541                         case 1: /* D3 vline */
6542                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6543                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6544
6545                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6546                                 DRM_DEBUG("IH: D3 vline\n");
6547
6548                                 break;
6549                         default:
6550                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6551                                 break;
6552                         }
6553                         break;
6554                 case 4: /* D4 vblank/vline */
6555                         switch (src_data) {
6556                         case 0: /* D4 vblank */
6557                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6558                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6559
6560                                 if (rdev->irq.crtc_vblank_int[3]) {
6561                                         drm_handle_vblank(rdev->ddev, 3);
6562                                         rdev->pm.vblank_sync = true;
6563                                         wake_up(&rdev->irq.vblank_queue);
6564                                 }
6565                                 if (atomic_read(&rdev->irq.pflip[3]))
6566                                         radeon_crtc_handle_vblank(rdev, 3);
6567                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6568                                 DRM_DEBUG("IH: D4 vblank\n");
6569
6570                                 break;
6571                         case 1: /* D4 vline */
6572                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6573                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6574
6575                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6576                                 DRM_DEBUG("IH: D4 vline\n");
6577
6578                                 break;
6579                         default:
6580                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6581                                 break;
6582                         }
6583                         break;
6584                 case 5: /* D5 vblank/vline */
6585                         switch (src_data) {
6586                         case 0: /* D5 vblank */
6587                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6588                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6589
6590                                 if (rdev->irq.crtc_vblank_int[4]) {
6591                                         drm_handle_vblank(rdev->ddev, 4);
6592                                         rdev->pm.vblank_sync = true;
6593                                         wake_up(&rdev->irq.vblank_queue);
6594                                 }
6595                                 if (atomic_read(&rdev->irq.pflip[4]))
6596                                         radeon_crtc_handle_vblank(rdev, 4);
6597                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6598                                 DRM_DEBUG("IH: D5 vblank\n");
6599
6600                                 break;
6601                         case 1: /* D5 vline */
6602                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6603                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6604
6605                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6606                                 DRM_DEBUG("IH: D5 vline\n");
6607
6608                                 break;
6609                         default:
6610                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6611                                 break;
6612                         }
6613                         break;
6614                 case 6: /* D6 vblank/vline */
6615                         switch (src_data) {
6616                         case 0: /* D6 vblank */
6617                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6618                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6619
6620                                 if (rdev->irq.crtc_vblank_int[5]) {
6621                                         drm_handle_vblank(rdev->ddev, 5);
6622                                         rdev->pm.vblank_sync = true;
6623                                         wake_up(&rdev->irq.vblank_queue);
6624                                 }
6625                                 if (atomic_read(&rdev->irq.pflip[5]))
6626                                         radeon_crtc_handle_vblank(rdev, 5);
6627                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6628                                 DRM_DEBUG("IH: D6 vblank\n");
6629
6630                                 break;
6631                         case 1: /* D6 vline */
6632                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6633                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6634
6635                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6636                                 DRM_DEBUG("IH: D6 vline\n");
6637
6638                                 break;
6639                         default:
6640                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6641                                 break;
6642                         }
6643                         break;
6644                 case 8: /* D1 page flip */
6645                 case 10: /* D2 page flip */
6646                 case 12: /* D3 page flip */
6647                 case 14: /* D4 page flip */
6648                 case 16: /* D5 page flip */
6649                 case 18: /* D6 page flip */
6650                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6651                         if (radeon_use_pflipirq > 0)
6652                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6653                         break;
6654                 case 42: /* HPD hotplug */
6655                         switch (src_data) {
6656                         case 0:
6657                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6658                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6659
6660                                 rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6661                                 queue_hotplug = true;
6662                                 DRM_DEBUG("IH: HPD1\n");
6663
6664                                 break;
6665                         case 1:
6666                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6667                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6668
6669                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6670                                 queue_hotplug = true;
6671                                 DRM_DEBUG("IH: HPD2\n");
6672
6673                                 break;
6674                         case 2:
6675                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6676                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6677
6678                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6679                                 queue_hotplug = true;
6680                                 DRM_DEBUG("IH: HPD3\n");
6681
6682                                 break;
6683                         case 3:
6684                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6685                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6686
6687                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6688                                 queue_hotplug = true;
6689                                 DRM_DEBUG("IH: HPD4\n");
6690
6691                                 break;
6692                         case 4:
6693                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6694                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6695
6696                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6697                                 queue_hotplug = true;
6698                                 DRM_DEBUG("IH: HPD5\n");
6699
6700                                 break;
6701                         case 5:
6702                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6703                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6704
6705                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6706                                 queue_hotplug = true;
6707                                 DRM_DEBUG("IH: HPD6\n");
6708
6709                                 break;
6710                         case 6:
6711                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6712                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6713
6714                                 rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6715                                 queue_dp = true;
6716                                 DRM_DEBUG("IH: HPD_RX 1\n");
6717
6718                                 break;
6719                         case 7:
6720                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6721                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6722
6723                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6724                                 queue_dp = true;
6725                                 DRM_DEBUG("IH: HPD_RX 2\n");
6726
6727                                 break;
6728                         case 8:
6729                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6730                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6731
6732                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6733                                 queue_dp = true;
6734                                 DRM_DEBUG("IH: HPD_RX 3\n");
6735
6736                                 break;
6737                         case 9:
6738                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6739                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6740
6741                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6742                                 queue_dp = true;
6743                                 DRM_DEBUG("IH: HPD_RX 4\n");
6744
6745                                 break;
6746                         case 10:
6747                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6748                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6749
6750                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6751                                 queue_dp = true;
6752                                 DRM_DEBUG("IH: HPD_RX 5\n");
6753
6754                                 break;
6755                         case 11:
6756                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6757                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6758
6759                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6760                                 queue_dp = true;
6761                                 DRM_DEBUG("IH: HPD_RX 6\n");
6762
6763                                 break;
6764                         default:
6765                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6766                                 break;
6767                         }
6768                         break;
6769                 case 96:
6770                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6771                         WREG32(SRBM_INT_ACK, 0x1);
6772                         break;
6773                 case 124: /* UVD */
6774                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6775                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6776                         break;
6777                 case 146:
6778                 case 147:
6779                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6780                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6781                         /* reset addr and status */
6782                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6783                         if (addr == 0x0 && status == 0x0)
6784                                 break;
6785                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6786                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6787                                 addr);
6788                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6789                                 status);
6790                         si_vm_decode_fault(rdev, status, addr);
6791                         break;
6792                 case 176: /* RINGID0 CP_INT */
6793                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6794                         break;
6795                 case 177: /* RINGID1 CP_INT */
6796                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6797                         break;
6798                 case 178: /* RINGID2 CP_INT */
6799                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6800                         break;
6801                 case 181: /* CP EOP event */
6802                         DRM_DEBUG("IH: CP EOP\n");
6803                         switch (ring_id) {
6804                         case 0:
6805                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6806                                 break;
6807                         case 1:
6808                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6809                                 break;
6810                         case 2:
6811                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6812                                 break;
6813                         }
6814                         break;
6815                 case 224: /* DMA trap event */
6816                         DRM_DEBUG("IH: DMA trap\n");
6817                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6818                         break;
6819                 case 230: /* thermal low to high */
6820                         DRM_DEBUG("IH: thermal low to high\n");
6821                         rdev->pm.dpm.thermal.high_to_low = false;
6822                         queue_thermal = true;
6823                         break;
6824                 case 231: /* thermal high to low */
6825                         DRM_DEBUG("IH: thermal high to low\n");
6826                         rdev->pm.dpm.thermal.high_to_low = true;
6827                         queue_thermal = true;
6828                         break;
6829                 case 233: /* GUI IDLE */
6830                         DRM_DEBUG("IH: GUI idle\n");
6831                         break;
6832                 case 244: /* DMA trap event */
6833                         DRM_DEBUG("IH: DMA1 trap\n");
6834                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6835                         break;
6836                 default:
6837                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6838                         break;
6839                 }
6840
6841                 /* wptr/rptr are in bytes! */
6842                 rptr += 16;
6843                 rptr &= rdev->ih.ptr_mask;
6844                 WREG32(IH_RB_RPTR, rptr);
6845         }
6846         if (queue_dp)
6847                 schedule_work(&rdev->dp_work);
6848         if (queue_hotplug)
6849                 schedule_delayed_work(&rdev->hotplug_work, 0);
6850         if (queue_thermal && rdev->pm.dpm_enabled)
6851                 schedule_work(&rdev->pm.dpm.thermal.work);
6852         rdev->ih.rptr = rptr;
6853         atomic_set(&rdev->ih.lock, 0);
6854
6855         /* make sure wptr hasn't changed while processing */
6856         wptr = si_get_ih_wptr(rdev);
6857         if (wptr != rptr)
6858                 goto restart_ih;
6859
6860         return IRQ_HANDLED;
6861 }
6862
6863 /*
6864  * startup/shutdown callbacks
6865  */
6866 static void si_uvd_init(struct radeon_device *rdev)
6867 {
6868         int r;
6869
6870         if (!rdev->has_uvd)
6871                 return;
6872
6873         r = radeon_uvd_init(rdev);
6874         if (r) {
6875                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6876                 /*
6877                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6878                  * to early fails uvd_v2_2_resume() and thus nothing happens
6879                  * there. So it is pointless to try to go through that code
6880                  * hence why we disable uvd here.
6881                  */
6882                 rdev->has_uvd = 0;
6883                 return;
6884         }
6885         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6886         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6887 }
6888
6889 static void si_uvd_start(struct radeon_device *rdev)
6890 {
6891         int r;
6892
6893         if (!rdev->has_uvd)
6894                 return;
6895
6896         r = uvd_v2_2_resume(rdev);
6897         if (r) {
6898                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6899                 goto error;
6900         }
6901         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6902         if (r) {
6903                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6904                 goto error;
6905         }
6906         return;
6907
6908 error:
6909         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6910 }
6911
6912 static void si_uvd_resume(struct radeon_device *rdev)
6913 {
6914         struct radeon_ring *ring;
6915         int r;
6916
6917         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6918                 return;
6919
6920         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6921         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6922         if (r) {
6923                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6924                 return;
6925         }
6926         r = uvd_v1_0_init(rdev);
6927         if (r) {
6928                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6929                 return;
6930         }
6931 }
6932
6933 static void si_vce_init(struct radeon_device *rdev)
6934 {
6935         int r;
6936
6937         if (!rdev->has_vce)
6938                 return;
6939
6940         r = radeon_vce_init(rdev);
6941         if (r) {
6942                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6943                 /*
6944                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
6945                  * to early fails si_vce_start() and thus nothing happens
6946                  * there. So it is pointless to try to go through that code
6947                  * hence why we disable vce here.
6948                  */
6949                 rdev->has_vce = 0;
6950                 return;
6951         }
6952         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6953         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6954         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6955         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6956 }
6957
6958 static void si_vce_start(struct radeon_device *rdev)
6959 {
6960         int r;
6961
6962         if (!rdev->has_vce)
6963                 return;
6964
6965         r = radeon_vce_resume(rdev);
6966         if (r) {
6967                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6968                 goto error;
6969         }
6970         r = vce_v1_0_resume(rdev);
6971         if (r) {
6972                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6973                 goto error;
6974         }
6975         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6976         if (r) {
6977                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6978                 goto error;
6979         }
6980         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6981         if (r) {
6982                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6983                 goto error;
6984         }
6985         return;
6986
6987 error:
6988         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6989         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6990 }
6991
6992 static void si_vce_resume(struct radeon_device *rdev)
6993 {
6994         struct radeon_ring *ring;
6995         int r;
6996
6997         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6998                 return;
6999
7000         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7001         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7002         if (r) {
7003                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7004                 return;
7005         }
7006         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7007         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7008         if (r) {
7009                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7010                 return;
7011         }
7012         r = vce_v1_0_init(rdev);
7013         if (r) {
7014                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7015                 return;
7016         }
7017 }
7018
7019 static int si_startup(struct radeon_device *rdev)
7020 {
7021         struct radeon_ring *ring;
7022         int r;
7023
7024         /* enable pcie gen2/3 link */
7025         si_pcie_gen3_enable(rdev);
7026         /* enable aspm */
7027         si_program_aspm(rdev);
7028
7029         /* scratch needs to be initialized before MC */
7030         r = r600_vram_scratch_init(rdev);
7031         if (r)
7032                 return r;
7033
7034         si_mc_program(rdev);
7035
7036         if (!rdev->pm.dpm_enabled) {
7037                 r = si_mc_load_microcode(rdev);
7038                 if (r) {
7039                         DRM_ERROR("Failed to load MC firmware!\n");
7040                         return r;
7041                 }
7042         }
7043
7044         r = si_pcie_gart_enable(rdev);
7045         if (r)
7046                 return r;
7047         si_gpu_init(rdev);
7048
7049         /* allocate rlc buffers */
7050         if (rdev->family == CHIP_VERDE) {
7051                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7052                 rdev->rlc.reg_list_size =
7053                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7054         }
7055         rdev->rlc.cs_data = si_cs_data;
7056         r = sumo_rlc_init(rdev);
7057         if (r) {
7058                 DRM_ERROR("Failed to init rlc BOs!\n");
7059                 return r;
7060         }
7061
7062         /* allocate wb buffer */
7063         r = radeon_wb_init(rdev);
7064         if (r)
7065                 return r;
7066
7067         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7068         if (r) {
7069                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7070                 return r;
7071         }
7072
7073         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7074         if (r) {
7075                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7076                 return r;
7077         }
7078
7079         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7080         if (r) {
7081                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7082                 return r;
7083         }
7084
7085         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7086         if (r) {
7087                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7088                 return r;
7089         }
7090
7091         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7092         if (r) {
7093                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7094                 return r;
7095         }
7096
7097         si_uvd_start(rdev);
7098         si_vce_start(rdev);
7099
7100         /* Enable IRQ */
7101         if (!rdev->irq.installed) {
7102                 r = radeon_irq_kms_init(rdev);
7103                 if (r)
7104                         return r;
7105         }
7106
7107         r = si_irq_init(rdev);
7108         if (r) {
7109                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7110                 radeon_irq_kms_fini(rdev);
7111                 return r;
7112         }
7113         si_irq_set(rdev);
7114
7115         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7116         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7117                              RADEON_CP_PACKET2);
7118         if (r)
7119                 return r;
7120
7121         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7122         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7123                              RADEON_CP_PACKET2);
7124         if (r)
7125                 return r;
7126
7127         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7128         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7129                              RADEON_CP_PACKET2);
7130         if (r)
7131                 return r;
7132
7133         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7134         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7135                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7136         if (r)
7137                 return r;
7138
7139         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7140         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7141                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7142         if (r)
7143                 return r;
7144
7145         r = si_cp_load_microcode(rdev);
7146         if (r)
7147                 return r;
7148         r = si_cp_resume(rdev);
7149         if (r)
7150                 return r;
7151
7152         r = cayman_dma_resume(rdev);
7153         if (r)
7154                 return r;
7155
7156         si_uvd_resume(rdev);
7157         si_vce_resume(rdev);
7158
7159         r = radeon_ib_pool_init(rdev);
7160         if (r) {
7161                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7162                 return r;
7163         }
7164
7165         r = radeon_vm_manager_init(rdev);
7166         if (r) {
7167                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7168                 return r;
7169         }
7170
7171         r = radeon_audio_init(rdev);
7172         if (r)
7173                 return r;
7174
7175         return 0;
7176 }
7177
7178 int si_resume(struct radeon_device *rdev)
7179 {
7180         int r;
7181
7182         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7183          * posting will perform necessary task to bring back GPU into good
7184          * shape.
7185          */
7186         /* post card */
7187         atom_asic_init(rdev->mode_info.atom_context);
7188
7189         /* init golden registers */
7190         si_init_golden_registers(rdev);
7191
7192         if (rdev->pm.pm_method == PM_METHOD_DPM)
7193                 radeon_pm_resume(rdev);
7194
7195         rdev->accel_working = true;
7196         r = si_startup(rdev);
7197         if (r) {
7198                 DRM_ERROR("si startup failed on resume\n");
7199                 rdev->accel_working = false;
7200                 return r;
7201         }
7202
7203         return r;
7204
7205 }
7206
7207 int si_suspend(struct radeon_device *rdev)
7208 {
7209         radeon_pm_suspend(rdev);
7210         radeon_audio_fini(rdev);
7211         radeon_vm_manager_fini(rdev);
7212         si_cp_enable(rdev, false);
7213         cayman_dma_stop(rdev);
7214         if (rdev->has_uvd) {
7215                 uvd_v1_0_fini(rdev);
7216                 radeon_uvd_suspend(rdev);
7217         }
7218         if (rdev->has_vce)
7219                 radeon_vce_suspend(rdev);
7220         si_fini_pg(rdev);
7221         si_fini_cg(rdev);
7222         si_irq_suspend(rdev);
7223         radeon_wb_disable(rdev);
7224         si_pcie_gart_disable(rdev);
7225         return 0;
7226 }
7227
7228 /* Plan is to move initialization in that function and use
7229  * helper function so that radeon_device_init pretty much
7230  * do nothing more than calling asic specific function. This
7231  * should also allow to remove a bunch of callback function
7232  * like vram_info.
7233  */
7234 int si_init(struct radeon_device *rdev)
7235 {
7236         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7237         int r;
7238
7239         /* Read BIOS */
7240         if (!radeon_get_bios(rdev)) {
7241                 if (ASIC_IS_AVIVO(rdev))
7242                         return -EINVAL;
7243         }
7244         /* Must be an ATOMBIOS */
7245         if (!rdev->is_atom_bios) {
7246                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7247                 return -EINVAL;
7248         }
7249         r = radeon_atombios_init(rdev);
7250         if (r)
7251                 return r;
7252
7253         /* Post card if necessary */
7254         if (!radeon_card_posted(rdev)) {
7255                 if (!rdev->bios) {
7256                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7257                         return -EINVAL;
7258                 }
7259                 DRM_INFO("GPU not posted. posting now...\n");
7260                 atom_asic_init(rdev->mode_info.atom_context);
7261         }
7262         /* init golden registers */
7263         si_init_golden_registers(rdev);
7264         /* Initialize scratch registers */
7265         si_scratch_init(rdev);
7266         /* Initialize surface registers */
7267         radeon_surface_init(rdev);
7268         /* Initialize clocks */
7269         radeon_get_clock_info(rdev->ddev);
7270
7271         /* Fence driver */
7272         r = radeon_fence_driver_init(rdev);
7273         if (r)
7274                 return r;
7275
7276         /* initialize memory controller */
7277         r = si_mc_init(rdev);
7278         if (r)
7279                 return r;
7280         /* Memory manager */
7281         r = radeon_bo_init(rdev);
7282         if (r)
7283                 return r;
7284
7285         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7286             !rdev->rlc_fw || !rdev->mc_fw) {
7287                 r = si_init_microcode(rdev);
7288                 if (r) {
7289                         DRM_ERROR("Failed to load firmware!\n");
7290                         return r;
7291                 }
7292         }
7293
7294         /* Initialize power management */
7295         radeon_pm_init(rdev);
7296
7297         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7298         ring->ring_obj = NULL;
7299         r600_ring_init(rdev, ring, 1024 * 1024);
7300
7301         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7302         ring->ring_obj = NULL;
7303         r600_ring_init(rdev, ring, 1024 * 1024);
7304
7305         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7306         ring->ring_obj = NULL;
7307         r600_ring_init(rdev, ring, 1024 * 1024);
7308
7309         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7310         ring->ring_obj = NULL;
7311         r600_ring_init(rdev, ring, 64 * 1024);
7312
7313         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7314         ring->ring_obj = NULL;
7315         r600_ring_init(rdev, ring, 64 * 1024);
7316
7317         si_uvd_init(rdev);
7318         si_vce_init(rdev);
7319
7320         rdev->ih.ring_obj = NULL;
7321         r600_ih_ring_init(rdev, 64 * 1024);
7322
7323         r = r600_pcie_gart_init(rdev);
7324         if (r)
7325                 return r;
7326
7327         rdev->accel_working = true;
7328         r = si_startup(rdev);
7329         if (r) {
7330                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7331                 si_cp_fini(rdev);
7332                 cayman_dma_fini(rdev);
7333                 si_irq_fini(rdev);
7334                 sumo_rlc_fini(rdev);
7335                 radeon_wb_fini(rdev);
7336                 radeon_ib_pool_fini(rdev);
7337                 radeon_vm_manager_fini(rdev);
7338                 radeon_irq_kms_fini(rdev);
7339                 si_pcie_gart_fini(rdev);
7340                 rdev->accel_working = false;
7341         }
7342
7343         /* Don't start up if the MC ucode is missing.
7344          * The default clocks and voltages before the MC ucode
7345          * is loaded are not suffient for advanced operations.
7346          */
7347         if (!rdev->mc_fw) {
7348                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7349                 return -EINVAL;
7350         }
7351
7352         return 0;
7353 }
7354
7355 void si_fini(struct radeon_device *rdev)
7356 {
7357         radeon_pm_fini(rdev);
7358         si_cp_fini(rdev);
7359         cayman_dma_fini(rdev);
7360         si_fini_pg(rdev);
7361         si_fini_cg(rdev);
7362         si_irq_fini(rdev);
7363         sumo_rlc_fini(rdev);
7364         radeon_wb_fini(rdev);
7365         radeon_vm_manager_fini(rdev);
7366         radeon_ib_pool_fini(rdev);
7367         radeon_irq_kms_fini(rdev);
7368         if (rdev->has_uvd) {
7369                 uvd_v1_0_fini(rdev);
7370                 radeon_uvd_fini(rdev);
7371         }
7372         if (rdev->has_vce)
7373                 radeon_vce_fini(rdev);
7374         si_pcie_gart_fini(rdev);
7375         r600_vram_scratch_fini(rdev);
7376         radeon_gem_fini(rdev);
7377         radeon_fence_driver_fini(rdev);
7378         radeon_bo_fini(rdev);
7379         radeon_atombios_fini(rdev);
7380         kfree(rdev->bios);
7381         rdev->bios = NULL;
7382 }
7383
7384 /**
7385  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7386  *
7387  * @rdev: radeon_device pointer
7388  *
7389  * Fetches a GPU clock counter snapshot (SI).
7390  * Returns the 64 bit clock counter snapshot.
7391  */
7392 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7393 {
7394         uint64_t clock;
7395
7396         mutex_lock(&rdev->gpu_clock_mutex);
7397         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7398         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7399                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7400         mutex_unlock(&rdev->gpu_clock_mutex);
7401         return clock;
7402 }
7403
7404 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7405 {
7406         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7407         int r;
7408
7409         /* bypass vclk and dclk with bclk */
7410         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7411                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7412                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7413
7414         /* put PLL in bypass mode */
7415         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7416
7417         if (!vclk || !dclk) {
7418                 /* keep the Bypass mode */
7419                 return 0;
7420         }
7421
7422         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7423                                           16384, 0x03FFFFFF, 0, 128, 5,
7424                                           &fb_div, &vclk_div, &dclk_div);
7425         if (r)
7426                 return r;
7427
7428         /* set RESET_ANTI_MUX to 0 */
7429         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7430
7431         /* set VCO_MODE to 1 */
7432         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7433
7434         /* disable sleep mode */
7435         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7436
7437         /* deassert UPLL_RESET */
7438         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7439
7440         mdelay(1);
7441
7442         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7443         if (r)
7444                 return r;
7445
7446         /* assert UPLL_RESET again */
7447         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7448
7449         /* disable spread spectrum. */
7450         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7451
7452         /* set feedback divider */
7453         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7454
7455         /* set ref divider to 0 */
7456         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7457
7458         if (fb_div < 307200)
7459                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7460         else
7461                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7462
7463         /* set PDIV_A and PDIV_B */
7464         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7465                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7466                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7467
7468         /* give the PLL some time to settle */
7469         mdelay(15);
7470
7471         /* deassert PLL_RESET */
7472         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7473
7474         mdelay(15);
7475
7476         /* switch from bypass mode to normal mode */
7477         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7478
7479         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7480         if (r)
7481                 return r;
7482
7483         /* switch VCLK and DCLK selection */
7484         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7485                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7486                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7487
7488         mdelay(100);
7489
7490         return 0;
7491 }
7492
7493 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7494 {
7495         struct pci_dev *root = rdev->pdev->bus->self;
7496         int bridge_pos, gpu_pos;
7497         u32 speed_cntl, mask, current_data_rate;
7498         int ret, i;
7499         u16 tmp16;
7500
7501         if (pci_is_root_bus(rdev->pdev->bus))
7502                 return;
7503
7504         if (radeon_pcie_gen2 == 0)
7505                 return;
7506
7507         if (rdev->flags & RADEON_IS_IGP)
7508                 return;
7509
7510         if (!(rdev->flags & RADEON_IS_PCIE))
7511                 return;
7512
7513         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7514         if (ret != 0)
7515                 return;
7516
7517         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7518                 return;
7519
7520         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7521         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7522                 LC_CURRENT_DATA_RATE_SHIFT;
7523         if (mask & DRM_PCIE_SPEED_80) {
7524                 if (current_data_rate == 2) {
7525                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7526                         return;
7527                 }
7528                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7529         } else if (mask & DRM_PCIE_SPEED_50) {
7530                 if (current_data_rate == 1) {
7531                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7532                         return;
7533                 }
7534                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7535         }
7536
7537         bridge_pos = pci_pcie_cap(root);
7538         if (!bridge_pos)
7539                 return;
7540
7541         gpu_pos = pci_pcie_cap(rdev->pdev);
7542         if (!gpu_pos)
7543                 return;
7544
7545         if (mask & DRM_PCIE_SPEED_80) {
7546                 /* re-try equalization if gen3 is not already enabled */
7547                 if (current_data_rate != 2) {
7548                         u16 bridge_cfg, gpu_cfg;
7549                         u16 bridge_cfg2, gpu_cfg2;
7550                         u32 max_lw, current_lw, tmp;
7551
7552                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7553                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7554
7555                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7556                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7557
7558                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7559                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7560
7561                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7562                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7563                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7564
7565                         if (current_lw < max_lw) {
7566                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7567                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7568                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7569                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7570                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7571                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7572                                 }
7573                         }
7574
7575                         for (i = 0; i < 10; i++) {
7576                                 /* check status */
7577                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7578                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7579                                         break;
7580
7581                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7582                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7583
7584                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7585                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7586
7587                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7588                                 tmp |= LC_SET_QUIESCE;
7589                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7590
7591                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7592                                 tmp |= LC_REDO_EQ;
7593                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7594
7595                                 mdelay(100);
7596
7597                                 /* linkctl */
7598                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7599                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7600                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7601                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7602
7603                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7604                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7605                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7606                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7607
7608                                 /* linkctl2 */
7609                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7610                                 tmp16 &= ~((1 << 4) | (7 << 9));
7611                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7612                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7613
7614                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7615                                 tmp16 &= ~((1 << 4) | (7 << 9));
7616                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7617                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7618
7619                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7620                                 tmp &= ~LC_SET_QUIESCE;
7621                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7622                         }
7623                 }
7624         }
7625
7626         /* set the link speed */
7627         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7628         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7629         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7630
7631         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7632         tmp16 &= ~0xf;
7633         if (mask & DRM_PCIE_SPEED_80)
7634                 tmp16 |= 3; /* gen3 */
7635         else if (mask & DRM_PCIE_SPEED_50)
7636                 tmp16 |= 2; /* gen2 */
7637         else
7638                 tmp16 |= 1; /* gen1 */
7639         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7640
7641         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7642         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7643         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7644
7645         for (i = 0; i < rdev->usec_timeout; i++) {
7646                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7647                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7648                         break;
7649                 udelay(1);
7650         }
7651 }
7652
7653 static void si_program_aspm(struct radeon_device *rdev)
7654 {
7655         u32 data, orig;
7656         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7657         bool disable_clkreq = false;
7658
7659         if (radeon_aspm == 0)
7660                 return;
7661
7662         if (!(rdev->flags & RADEON_IS_PCIE))
7663                 return;
7664
7665         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7666         data &= ~LC_XMIT_N_FTS_MASK;
7667         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7668         if (orig != data)
7669                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7670
7671         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7672         data |= LC_GO_TO_RECOVERY;
7673         if (orig != data)
7674                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7675
7676         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7677         data |= P_IGNORE_EDB_ERR;
7678         if (orig != data)
7679                 WREG32_PCIE(PCIE_P_CNTL, data);
7680
7681         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7682         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7683         data |= LC_PMI_TO_L1_DIS;
7684         if (!disable_l0s)
7685                 data |= LC_L0S_INACTIVITY(7);
7686
7687         if (!disable_l1) {
7688                 data |= LC_L1_INACTIVITY(7);
7689                 data &= ~LC_PMI_TO_L1_DIS;
7690                 if (orig != data)
7691                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7692
7693                 if (!disable_plloff_in_l1) {
7694                         bool clk_req_support;
7695
7696                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7697                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7698                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7699                         if (orig != data)
7700                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7701
7702                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7703                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7704                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7705                         if (orig != data)
7706                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7707
7708                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7709                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7710                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7711                         if (orig != data)
7712                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7713
7714                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7715                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7716                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7717                         if (orig != data)
7718                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7719
7720                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7721                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7722                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7723                                 if (orig != data)
7724                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7725
7726                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7727                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7728                                 if (orig != data)
7729                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7730
7731                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7732                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7733                                 if (orig != data)
7734                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7735
7736                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7737                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7738                                 if (orig != data)
7739                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7740
7741                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7742                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7743                                 if (orig != data)
7744                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7745
7746                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7747                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7748                                 if (orig != data)
7749                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7750
7751                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7752                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7753                                 if (orig != data)
7754                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7755
7756                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7757                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7758                                 if (orig != data)
7759                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7760                         }
7761                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7762                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7763                         data |= LC_DYN_LANES_PWR_STATE(3);
7764                         if (orig != data)
7765                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7766
7767                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7768                         data &= ~LS2_EXIT_TIME_MASK;
7769                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7770                                 data |= LS2_EXIT_TIME(5);
7771                         if (orig != data)
7772                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7773
7774                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7775                         data &= ~LS2_EXIT_TIME_MASK;
7776                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7777                                 data |= LS2_EXIT_TIME(5);
7778                         if (orig != data)
7779                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7780
7781                         if (!disable_clkreq &&
7782                             !pci_is_root_bus(rdev->pdev->bus)) {
7783                                 struct pci_dev *root = rdev->pdev->bus->self;
7784                                 u32 lnkcap;
7785
7786                                 clk_req_support = false;
7787                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7788                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7789                                         clk_req_support = true;
7790                         } else {
7791                                 clk_req_support = false;
7792                         }
7793
7794                         if (clk_req_support) {
7795                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7796                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7797                                 if (orig != data)
7798                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7799
7800                                 orig = data = RREG32(THM_CLK_CNTL);
7801                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7802                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7803                                 if (orig != data)
7804                                         WREG32(THM_CLK_CNTL, data);
7805
7806                                 orig = data = RREG32(MISC_CLK_CNTL);
7807                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7808                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7809                                 if (orig != data)
7810                                         WREG32(MISC_CLK_CNTL, data);
7811
7812                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7813                                 data &= ~BCLK_AS_XCLK;
7814                                 if (orig != data)
7815                                         WREG32(CG_CLKPIN_CNTL, data);
7816
7817                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7818                                 data &= ~FORCE_BIF_REFCLK_EN;
7819                                 if (orig != data)
7820                                         WREG32(CG_CLKPIN_CNTL_2, data);
7821
7822                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7823                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7824                                 data |= MPLL_CLKOUT_SEL(4);
7825                                 if (orig != data)
7826                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7827
7828                                 orig = data = RREG32(SPLL_CNTL_MODE);
7829                                 data &= ~SPLL_REFCLK_SEL_MASK;
7830                                 if (orig != data)
7831                                         WREG32(SPLL_CNTL_MODE, data);
7832                         }
7833                 }
7834         } else {
7835                 if (orig != data)
7836                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7837         }
7838
7839         orig = data = RREG32_PCIE(PCIE_CNTL2);
7840         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7841         if (orig != data)
7842                 WREG32_PCIE(PCIE_CNTL2, data);
7843
7844         if (!disable_l0s) {
7845                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7846                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7847                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7848                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7849                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7850                                 data &= ~LC_L0S_INACTIVITY_MASK;
7851                                 if (orig != data)
7852                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7853                         }
7854                 }
7855         }
7856 }
7857
7858 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7859 {
7860         unsigned i;
7861
7862         /* make sure VCEPLL_CTLREQ is deasserted */
7863         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7864
7865         mdelay(10);
7866
7867         /* assert UPLL_CTLREQ */
7868         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7869
7870         /* wait for CTLACK and CTLACK2 to get asserted */
7871         for (i = 0; i < 100; ++i) {
7872                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7873                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7874                         break;
7875                 mdelay(10);
7876         }
7877
7878         /* deassert UPLL_CTLREQ */
7879         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7880
7881         if (i == 100) {
7882                 DRM_ERROR("Timeout setting UVD clocks!\n");
7883                 return -ETIMEDOUT;
7884         }
7885
7886         return 0;
7887 }
7888
7889 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7890 {
7891         unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7892         int r;
7893
7894         /* bypass evclk and ecclk with bclk */
7895         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7896                      EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7897                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7898
7899         /* put PLL in bypass mode */
7900         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7901                      ~VCEPLL_BYPASS_EN_MASK);
7902
7903         if (!evclk || !ecclk) {
7904                 /* keep the Bypass mode, put PLL to sleep */
7905                 WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7906                              ~VCEPLL_SLEEP_MASK);
7907                 return 0;
7908         }
7909
7910         r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7911                                           16384, 0x03FFFFFF, 0, 128, 5,
7912                                           &fb_div, &evclk_div, &ecclk_div);
7913         if (r)
7914                 return r;
7915
7916         /* set RESET_ANTI_MUX to 0 */
7917         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7918
7919         /* set VCO_MODE to 1 */
7920         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7921                      ~VCEPLL_VCO_MODE_MASK);
7922
7923         /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7924         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7925                      ~VCEPLL_SLEEP_MASK);
7926         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7927
7928         /* deassert VCEPLL_RESET */
7929         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7930
7931         mdelay(1);
7932
7933         r = si_vce_send_vcepll_ctlreq(rdev);
7934         if (r)
7935                 return r;
7936
7937         /* assert VCEPLL_RESET again */
7938         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7939
7940         /* disable spread spectrum. */
7941         WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7942
7943         /* set feedback divider */
7944         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7945
7946         /* set ref divider to 0 */
7947         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7948
7949         /* set PDIV_A and PDIV_B */
7950         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7951                      VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7952                      ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7953
7954         /* give the PLL some time to settle */
7955         mdelay(15);
7956
7957         /* deassert PLL_RESET */
7958         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7959
7960         mdelay(15);
7961
7962         /* switch from bypass mode to normal mode */
7963         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7964
7965         r = si_vce_send_vcepll_ctlreq(rdev);
7966         if (r)
7967                 return r;
7968
7969         /* switch VCLK and DCLK selection */
7970         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7971                      EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7972                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7973
7974         mdelay(100);
7975
7976         return 0;
7977 }