drm/radeon: dump full IB if we hit a packet error
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37
38
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68
69 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
70 MODULE_FIRMWARE("radeon/VERDE_me.bin");
71 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
74 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
76
77 MODULE_FIRMWARE("radeon/verde_pfp.bin");
78 MODULE_FIRMWARE("radeon/verde_me.bin");
79 MODULE_FIRMWARE("radeon/verde_ce.bin");
80 MODULE_FIRMWARE("radeon/verde_mc.bin");
81 MODULE_FIRMWARE("radeon/verde_rlc.bin");
82 MODULE_FIRMWARE("radeon/verde_smc.bin");
83
84 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
85 MODULE_FIRMWARE("radeon/OLAND_me.bin");
86 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
88 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
89 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
91
92 MODULE_FIRMWARE("radeon/oland_pfp.bin");
93 MODULE_FIRMWARE("radeon/oland_me.bin");
94 MODULE_FIRMWARE("radeon/oland_ce.bin");
95 MODULE_FIRMWARE("radeon/oland_mc.bin");
96 MODULE_FIRMWARE("radeon/oland_rlc.bin");
97 MODULE_FIRMWARE("radeon/oland_smc.bin");
98
99 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
106
107 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
108 MODULE_FIRMWARE("radeon/hainan_me.bin");
109 MODULE_FIRMWARE("radeon/hainan_ce.bin");
110 MODULE_FIRMWARE("radeon/hainan_mc.bin");
111 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
112 MODULE_FIRMWARE("radeon/hainan_smc.bin");
113
114 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
115 static void si_pcie_gen3_enable(struct radeon_device *rdev);
116 static void si_program_aspm(struct radeon_device *rdev);
117 extern void sumo_rlc_fini(struct radeon_device *rdev);
118 extern int sumo_rlc_init(struct radeon_device *rdev);
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
122 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
125 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
126 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
127 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
128                                          bool enable);
129 static void si_init_pg(struct radeon_device *rdev);
130 static void si_init_cg(struct radeon_device *rdev);
131 static void si_fini_pg(struct radeon_device *rdev);
132 static void si_fini_cg(struct radeon_device *rdev);
133 static void si_rlc_stop(struct radeon_device *rdev);
134
135 static const u32 verde_rlc_save_restore_register_list[] =
136 {
137         (0x8000 << 16) | (0x98f4 >> 2),
138         0x00000000,
139         (0x8040 << 16) | (0x98f4 >> 2),
140         0x00000000,
141         (0x8000 << 16) | (0xe80 >> 2),
142         0x00000000,
143         (0x8040 << 16) | (0xe80 >> 2),
144         0x00000000,
145         (0x8000 << 16) | (0x89bc >> 2),
146         0x00000000,
147         (0x8040 << 16) | (0x89bc >> 2),
148         0x00000000,
149         (0x8000 << 16) | (0x8c1c >> 2),
150         0x00000000,
151         (0x8040 << 16) | (0x8c1c >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x98f0 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0xe7c >> 2),
156         0x00000000,
157         (0x8000 << 16) | (0x9148 >> 2),
158         0x00000000,
159         (0x8040 << 16) | (0x9148 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9150 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x897c >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x8d8c >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0xac54 >> 2),
168         0X00000000,
169         0x3,
170         (0x9c00 << 16) | (0x98f8 >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x9910 >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9914 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9918 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x991c >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x9920 >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x9924 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x9928 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x992c >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x9930 >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x9934 >> 2),
191         0x00000000,
192         (0x9c00 << 16) | (0x9938 >> 2),
193         0x00000000,
194         (0x9c00 << 16) | (0x993c >> 2),
195         0x00000000,
196         (0x9c00 << 16) | (0x9940 >> 2),
197         0x00000000,
198         (0x9c00 << 16) | (0x9944 >> 2),
199         0x00000000,
200         (0x9c00 << 16) | (0x9948 >> 2),
201         0x00000000,
202         (0x9c00 << 16) | (0x994c >> 2),
203         0x00000000,
204         (0x9c00 << 16) | (0x9950 >> 2),
205         0x00000000,
206         (0x9c00 << 16) | (0x9954 >> 2),
207         0x00000000,
208         (0x9c00 << 16) | (0x9958 >> 2),
209         0x00000000,
210         (0x9c00 << 16) | (0x995c >> 2),
211         0x00000000,
212         (0x9c00 << 16) | (0x9960 >> 2),
213         0x00000000,
214         (0x9c00 << 16) | (0x9964 >> 2),
215         0x00000000,
216         (0x9c00 << 16) | (0x9968 >> 2),
217         0x00000000,
218         (0x9c00 << 16) | (0x996c >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x9970 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9974 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x9978 >> 2),
225         0x00000000,
226         (0x9c00 << 16) | (0x997c >> 2),
227         0x00000000,
228         (0x9c00 << 16) | (0x9980 >> 2),
229         0x00000000,
230         (0x9c00 << 16) | (0x9984 >> 2),
231         0x00000000,
232         (0x9c00 << 16) | (0x9988 >> 2),
233         0x00000000,
234         (0x9c00 << 16) | (0x998c >> 2),
235         0x00000000,
236         (0x9c00 << 16) | (0x8c00 >> 2),
237         0x00000000,
238         (0x9c00 << 16) | (0x8c14 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x8c04 >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8c08 >> 2),
243         0x00000000,
244         (0x8000 << 16) | (0x9b7c >> 2),
245         0x00000000,
246         (0x8040 << 16) | (0x9b7c >> 2),
247         0x00000000,
248         (0x8000 << 16) | (0xe84 >> 2),
249         0x00000000,
250         (0x8040 << 16) | (0xe84 >> 2),
251         0x00000000,
252         (0x8000 << 16) | (0x89c0 >> 2),
253         0x00000000,
254         (0x8040 << 16) | (0x89c0 >> 2),
255         0x00000000,
256         (0x8000 << 16) | (0x914c >> 2),
257         0x00000000,
258         (0x8040 << 16) | (0x914c >> 2),
259         0x00000000,
260         (0x8000 << 16) | (0x8c20 >> 2),
261         0x00000000,
262         (0x8040 << 16) | (0x8c20 >> 2),
263         0x00000000,
264         (0x8000 << 16) | (0x9354 >> 2),
265         0x00000000,
266         (0x8040 << 16) | (0x9354 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0x9060 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x9364 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x9100 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x913c >> 2),
275         0x00000000,
276         (0x8000 << 16) | (0x90e0 >> 2),
277         0x00000000,
278         (0x8000 << 16) | (0x90e4 >> 2),
279         0x00000000,
280         (0x8000 << 16) | (0x90e8 >> 2),
281         0x00000000,
282         (0x8040 << 16) | (0x90e0 >> 2),
283         0x00000000,
284         (0x8040 << 16) | (0x90e4 >> 2),
285         0x00000000,
286         (0x8040 << 16) | (0x90e8 >> 2),
287         0x00000000,
288         (0x9c00 << 16) | (0x8bcc >> 2),
289         0x00000000,
290         (0x9c00 << 16) | (0x8b24 >> 2),
291         0x00000000,
292         (0x9c00 << 16) | (0x88c4 >> 2),
293         0x00000000,
294         (0x9c00 << 16) | (0x8e50 >> 2),
295         0x00000000,
296         (0x9c00 << 16) | (0x8c0c >> 2),
297         0x00000000,
298         (0x9c00 << 16) | (0x8e58 >> 2),
299         0x00000000,
300         (0x9c00 << 16) | (0x8e5c >> 2),
301         0x00000000,
302         (0x9c00 << 16) | (0x9508 >> 2),
303         0x00000000,
304         (0x9c00 << 16) | (0x950c >> 2),
305         0x00000000,
306         (0x9c00 << 16) | (0x9494 >> 2),
307         0x00000000,
308         (0x9c00 << 16) | (0xac0c >> 2),
309         0x00000000,
310         (0x9c00 << 16) | (0xac10 >> 2),
311         0x00000000,
312         (0x9c00 << 16) | (0xac14 >> 2),
313         0x00000000,
314         (0x9c00 << 16) | (0xae00 >> 2),
315         0x00000000,
316         (0x9c00 << 16) | (0xac08 >> 2),
317         0x00000000,
318         (0x9c00 << 16) | (0x88d4 >> 2),
319         0x00000000,
320         (0x9c00 << 16) | (0x88c8 >> 2),
321         0x00000000,
322         (0x9c00 << 16) | (0x88cc >> 2),
323         0x00000000,
324         (0x9c00 << 16) | (0x89b0 >> 2),
325         0x00000000,
326         (0x9c00 << 16) | (0x8b10 >> 2),
327         0x00000000,
328         (0x9c00 << 16) | (0x8a14 >> 2),
329         0x00000000,
330         (0x9c00 << 16) | (0x9830 >> 2),
331         0x00000000,
332         (0x9c00 << 16) | (0x9834 >> 2),
333         0x00000000,
334         (0x9c00 << 16) | (0x9838 >> 2),
335         0x00000000,
336         (0x9c00 << 16) | (0x9a10 >> 2),
337         0x00000000,
338         (0x8000 << 16) | (0x9870 >> 2),
339         0x00000000,
340         (0x8000 << 16) | (0x9874 >> 2),
341         0x00000000,
342         (0x8001 << 16) | (0x9870 >> 2),
343         0x00000000,
344         (0x8001 << 16) | (0x9874 >> 2),
345         0x00000000,
346         (0x8040 << 16) | (0x9870 >> 2),
347         0x00000000,
348         (0x8040 << 16) | (0x9874 >> 2),
349         0x00000000,
350         (0x8041 << 16) | (0x9870 >> 2),
351         0x00000000,
352         (0x8041 << 16) | (0x9874 >> 2),
353         0x00000000,
354         0x00000000
355 };
356
357 static const u32 tahiti_golden_rlc_registers[] =
358 {
359         0xc424, 0xffffffff, 0x00601005,
360         0xc47c, 0xffffffff, 0x10104040,
361         0xc488, 0xffffffff, 0x0100000a,
362         0xc314, 0xffffffff, 0x00000800,
363         0xc30c, 0xffffffff, 0x800000f4,
364         0xf4a8, 0xffffffff, 0x00000000
365 };
366
367 static const u32 tahiti_golden_registers[] =
368 {
369         0x9a10, 0x00010000, 0x00018208,
370         0x9830, 0xffffffff, 0x00000000,
371         0x9834, 0xf00fffff, 0x00000400,
372         0x9838, 0x0002021c, 0x00020200,
373         0xc78, 0x00000080, 0x00000000,
374         0xd030, 0x000300c0, 0x00800040,
375         0xd830, 0x000300c0, 0x00800040,
376         0x5bb0, 0x000000f0, 0x00000070,
377         0x5bc0, 0x00200000, 0x50100000,
378         0x7030, 0x31000311, 0x00000011,
379         0x277c, 0x00000003, 0x000007ff,
380         0x240c, 0x000007ff, 0x00000000,
381         0x8a14, 0xf000001f, 0x00000007,
382         0x8b24, 0xffffffff, 0x00ffffff,
383         0x8b10, 0x0000ff0f, 0x00000000,
384         0x28a4c, 0x07ffffff, 0x4e000000,
385         0x28350, 0x3f3f3fff, 0x2a00126a,
386         0x30, 0x000000ff, 0x0040,
387         0x34, 0x00000040, 0x00004040,
388         0x9100, 0x07ffffff, 0x03000000,
389         0x8e88, 0x01ff1f3f, 0x00000000,
390         0x8e84, 0x01ff1f3f, 0x00000000,
391         0x9060, 0x0000007f, 0x00000020,
392         0x9508, 0x00010000, 0x00010000,
393         0xac14, 0x00000200, 0x000002fb,
394         0xac10, 0xffffffff, 0x0000543b,
395         0xac0c, 0xffffffff, 0xa9210876,
396         0x88d0, 0xffffffff, 0x000fff40,
397         0x88d4, 0x0000001f, 0x00000010,
398         0x1410, 0x20000000, 0x20fffed8,
399         0x15c0, 0x000c0fc0, 0x000c0400
400 };
401
402 static const u32 tahiti_golden_registers2[] =
403 {
404         0xc64, 0x00000001, 0x00000001
405 };
406
407 static const u32 pitcairn_golden_rlc_registers[] =
408 {
409         0xc424, 0xffffffff, 0x00601004,
410         0xc47c, 0xffffffff, 0x10102020,
411         0xc488, 0xffffffff, 0x01000020,
412         0xc314, 0xffffffff, 0x00000800,
413         0xc30c, 0xffffffff, 0x800000a4
414 };
415
416 static const u32 pitcairn_golden_registers[] =
417 {
418         0x9a10, 0x00010000, 0x00018208,
419         0x9830, 0xffffffff, 0x00000000,
420         0x9834, 0xf00fffff, 0x00000400,
421         0x9838, 0x0002021c, 0x00020200,
422         0xc78, 0x00000080, 0x00000000,
423         0xd030, 0x000300c0, 0x00800040,
424         0xd830, 0x000300c0, 0x00800040,
425         0x5bb0, 0x000000f0, 0x00000070,
426         0x5bc0, 0x00200000, 0x50100000,
427         0x7030, 0x31000311, 0x00000011,
428         0x2ae4, 0x00073ffe, 0x000022a2,
429         0x240c, 0x000007ff, 0x00000000,
430         0x8a14, 0xf000001f, 0x00000007,
431         0x8b24, 0xffffffff, 0x00ffffff,
432         0x8b10, 0x0000ff0f, 0x00000000,
433         0x28a4c, 0x07ffffff, 0x4e000000,
434         0x28350, 0x3f3f3fff, 0x2a00126a,
435         0x30, 0x000000ff, 0x0040,
436         0x34, 0x00000040, 0x00004040,
437         0x9100, 0x07ffffff, 0x03000000,
438         0x9060, 0x0000007f, 0x00000020,
439         0x9508, 0x00010000, 0x00010000,
440         0xac14, 0x000003ff, 0x000000f7,
441         0xac10, 0xffffffff, 0x00000000,
442         0xac0c, 0xffffffff, 0x32761054,
443         0x88d4, 0x0000001f, 0x00000010,
444         0x15c0, 0x000c0fc0, 0x000c0400
445 };
446
447 static const u32 verde_golden_rlc_registers[] =
448 {
449         0xc424, 0xffffffff, 0x033f1005,
450         0xc47c, 0xffffffff, 0x10808020,
451         0xc488, 0xffffffff, 0x00800008,
452         0xc314, 0xffffffff, 0x00001000,
453         0xc30c, 0xffffffff, 0x80010014
454 };
455
456 static const u32 verde_golden_registers[] =
457 {
458         0x9a10, 0x00010000, 0x00018208,
459         0x9830, 0xffffffff, 0x00000000,
460         0x9834, 0xf00fffff, 0x00000400,
461         0x9838, 0x0002021c, 0x00020200,
462         0xc78, 0x00000080, 0x00000000,
463         0xd030, 0x000300c0, 0x00800040,
464         0xd030, 0x000300c0, 0x00800040,
465         0xd830, 0x000300c0, 0x00800040,
466         0xd830, 0x000300c0, 0x00800040,
467         0x5bb0, 0x000000f0, 0x00000070,
468         0x5bc0, 0x00200000, 0x50100000,
469         0x7030, 0x31000311, 0x00000011,
470         0x2ae4, 0x00073ffe, 0x000022a2,
471         0x2ae4, 0x00073ffe, 0x000022a2,
472         0x2ae4, 0x00073ffe, 0x000022a2,
473         0x240c, 0x000007ff, 0x00000000,
474         0x240c, 0x000007ff, 0x00000000,
475         0x240c, 0x000007ff, 0x00000000,
476         0x8a14, 0xf000001f, 0x00000007,
477         0x8a14, 0xf000001f, 0x00000007,
478         0x8a14, 0xf000001f, 0x00000007,
479         0x8b24, 0xffffffff, 0x00ffffff,
480         0x8b10, 0x0000ff0f, 0x00000000,
481         0x28a4c, 0x07ffffff, 0x4e000000,
482         0x28350, 0x3f3f3fff, 0x0000124a,
483         0x28350, 0x3f3f3fff, 0x0000124a,
484         0x28350, 0x3f3f3fff, 0x0000124a,
485         0x30, 0x000000ff, 0x0040,
486         0x34, 0x00000040, 0x00004040,
487         0x9100, 0x07ffffff, 0x03000000,
488         0x9100, 0x07ffffff, 0x03000000,
489         0x8e88, 0x01ff1f3f, 0x00000000,
490         0x8e88, 0x01ff1f3f, 0x00000000,
491         0x8e88, 0x01ff1f3f, 0x00000000,
492         0x8e84, 0x01ff1f3f, 0x00000000,
493         0x8e84, 0x01ff1f3f, 0x00000000,
494         0x8e84, 0x01ff1f3f, 0x00000000,
495         0x9060, 0x0000007f, 0x00000020,
496         0x9508, 0x00010000, 0x00010000,
497         0xac14, 0x000003ff, 0x00000003,
498         0xac14, 0x000003ff, 0x00000003,
499         0xac14, 0x000003ff, 0x00000003,
500         0xac10, 0xffffffff, 0x00000000,
501         0xac10, 0xffffffff, 0x00000000,
502         0xac10, 0xffffffff, 0x00000000,
503         0xac0c, 0xffffffff, 0x00001032,
504         0xac0c, 0xffffffff, 0x00001032,
505         0xac0c, 0xffffffff, 0x00001032,
506         0x88d4, 0x0000001f, 0x00000010,
507         0x88d4, 0x0000001f, 0x00000010,
508         0x88d4, 0x0000001f, 0x00000010,
509         0x15c0, 0x000c0fc0, 0x000c0400
510 };
511
512 static const u32 oland_golden_rlc_registers[] =
513 {
514         0xc424, 0xffffffff, 0x00601005,
515         0xc47c, 0xffffffff, 0x10104040,
516         0xc488, 0xffffffff, 0x0100000a,
517         0xc314, 0xffffffff, 0x00000800,
518         0xc30c, 0xffffffff, 0x800000f4
519 };
520
521 static const u32 oland_golden_registers[] =
522 {
523         0x9a10, 0x00010000, 0x00018208,
524         0x9830, 0xffffffff, 0x00000000,
525         0x9834, 0xf00fffff, 0x00000400,
526         0x9838, 0x0002021c, 0x00020200,
527         0xc78, 0x00000080, 0x00000000,
528         0xd030, 0x000300c0, 0x00800040,
529         0xd830, 0x000300c0, 0x00800040,
530         0x5bb0, 0x000000f0, 0x00000070,
531         0x5bc0, 0x00200000, 0x50100000,
532         0x7030, 0x31000311, 0x00000011,
533         0x2ae4, 0x00073ffe, 0x000022a2,
534         0x240c, 0x000007ff, 0x00000000,
535         0x8a14, 0xf000001f, 0x00000007,
536         0x8b24, 0xffffffff, 0x00ffffff,
537         0x8b10, 0x0000ff0f, 0x00000000,
538         0x28a4c, 0x07ffffff, 0x4e000000,
539         0x28350, 0x3f3f3fff, 0x00000082,
540         0x30, 0x000000ff, 0x0040,
541         0x34, 0x00000040, 0x00004040,
542         0x9100, 0x07ffffff, 0x03000000,
543         0x9060, 0x0000007f, 0x00000020,
544         0x9508, 0x00010000, 0x00010000,
545         0xac14, 0x000003ff, 0x000000f3,
546         0xac10, 0xffffffff, 0x00000000,
547         0xac0c, 0xffffffff, 0x00003210,
548         0x88d4, 0x0000001f, 0x00000010,
549         0x15c0, 0x000c0fc0, 0x000c0400
550 };
551
552 static const u32 hainan_golden_registers[] =
553 {
554         0x9a10, 0x00010000, 0x00018208,
555         0x9830, 0xffffffff, 0x00000000,
556         0x9834, 0xf00fffff, 0x00000400,
557         0x9838, 0x0002021c, 0x00020200,
558         0xd0c0, 0xff000fff, 0x00000100,
559         0xd030, 0x000300c0, 0x00800040,
560         0xd8c0, 0xff000fff, 0x00000100,
561         0xd830, 0x000300c0, 0x00800040,
562         0x2ae4, 0x00073ffe, 0x000022a2,
563         0x240c, 0x000007ff, 0x00000000,
564         0x8a14, 0xf000001f, 0x00000007,
565         0x8b24, 0xffffffff, 0x00ffffff,
566         0x8b10, 0x0000ff0f, 0x00000000,
567         0x28a4c, 0x07ffffff, 0x4e000000,
568         0x28350, 0x3f3f3fff, 0x00000000,
569         0x30, 0x000000ff, 0x0040,
570         0x34, 0x00000040, 0x00004040,
571         0x9100, 0x03e00000, 0x03600000,
572         0x9060, 0x0000007f, 0x00000020,
573         0x9508, 0x00010000, 0x00010000,
574         0xac14, 0x000003ff, 0x000000f1,
575         0xac10, 0xffffffff, 0x00000000,
576         0xac0c, 0xffffffff, 0x00003210,
577         0x88d4, 0x0000001f, 0x00000010,
578         0x15c0, 0x000c0fc0, 0x000c0400
579 };
580
581 static const u32 hainan_golden_registers2[] =
582 {
583         0x98f8, 0xffffffff, 0x02010001
584 };
585
586 static const u32 tahiti_mgcg_cgcg_init[] =
587 {
588         0xc400, 0xffffffff, 0xfffffffc,
589         0x802c, 0xffffffff, 0xe0000000,
590         0x9a60, 0xffffffff, 0x00000100,
591         0x92a4, 0xffffffff, 0x00000100,
592         0xc164, 0xffffffff, 0x00000100,
593         0x9774, 0xffffffff, 0x00000100,
594         0x8984, 0xffffffff, 0x06000100,
595         0x8a18, 0xffffffff, 0x00000100,
596         0x92a0, 0xffffffff, 0x00000100,
597         0xc380, 0xffffffff, 0x00000100,
598         0x8b28, 0xffffffff, 0x00000100,
599         0x9144, 0xffffffff, 0x00000100,
600         0x8d88, 0xffffffff, 0x00000100,
601         0x8d8c, 0xffffffff, 0x00000100,
602         0x9030, 0xffffffff, 0x00000100,
603         0x9034, 0xffffffff, 0x00000100,
604         0x9038, 0xffffffff, 0x00000100,
605         0x903c, 0xffffffff, 0x00000100,
606         0xad80, 0xffffffff, 0x00000100,
607         0xac54, 0xffffffff, 0x00000100,
608         0x897c, 0xffffffff, 0x06000100,
609         0x9868, 0xffffffff, 0x00000100,
610         0x9510, 0xffffffff, 0x00000100,
611         0xaf04, 0xffffffff, 0x00000100,
612         0xae04, 0xffffffff, 0x00000100,
613         0x949c, 0xffffffff, 0x00000100,
614         0x802c, 0xffffffff, 0xe0000000,
615         0x9160, 0xffffffff, 0x00010000,
616         0x9164, 0xffffffff, 0x00030002,
617         0x9168, 0xffffffff, 0x00040007,
618         0x916c, 0xffffffff, 0x00060005,
619         0x9170, 0xffffffff, 0x00090008,
620         0x9174, 0xffffffff, 0x00020001,
621         0x9178, 0xffffffff, 0x00040003,
622         0x917c, 0xffffffff, 0x00000007,
623         0x9180, 0xffffffff, 0x00060005,
624         0x9184, 0xffffffff, 0x00090008,
625         0x9188, 0xffffffff, 0x00030002,
626         0x918c, 0xffffffff, 0x00050004,
627         0x9190, 0xffffffff, 0x00000008,
628         0x9194, 0xffffffff, 0x00070006,
629         0x9198, 0xffffffff, 0x000a0009,
630         0x919c, 0xffffffff, 0x00040003,
631         0x91a0, 0xffffffff, 0x00060005,
632         0x91a4, 0xffffffff, 0x00000009,
633         0x91a8, 0xffffffff, 0x00080007,
634         0x91ac, 0xffffffff, 0x000b000a,
635         0x91b0, 0xffffffff, 0x00050004,
636         0x91b4, 0xffffffff, 0x00070006,
637         0x91b8, 0xffffffff, 0x0008000b,
638         0x91bc, 0xffffffff, 0x000a0009,
639         0x91c0, 0xffffffff, 0x000d000c,
640         0x91c4, 0xffffffff, 0x00060005,
641         0x91c8, 0xffffffff, 0x00080007,
642         0x91cc, 0xffffffff, 0x0000000b,
643         0x91d0, 0xffffffff, 0x000a0009,
644         0x91d4, 0xffffffff, 0x000d000c,
645         0x91d8, 0xffffffff, 0x00070006,
646         0x91dc, 0xffffffff, 0x00090008,
647         0x91e0, 0xffffffff, 0x0000000c,
648         0x91e4, 0xffffffff, 0x000b000a,
649         0x91e8, 0xffffffff, 0x000e000d,
650         0x91ec, 0xffffffff, 0x00080007,
651         0x91f0, 0xffffffff, 0x000a0009,
652         0x91f4, 0xffffffff, 0x0000000d,
653         0x91f8, 0xffffffff, 0x000c000b,
654         0x91fc, 0xffffffff, 0x000f000e,
655         0x9200, 0xffffffff, 0x00090008,
656         0x9204, 0xffffffff, 0x000b000a,
657         0x9208, 0xffffffff, 0x000c000f,
658         0x920c, 0xffffffff, 0x000e000d,
659         0x9210, 0xffffffff, 0x00110010,
660         0x9214, 0xffffffff, 0x000a0009,
661         0x9218, 0xffffffff, 0x000c000b,
662         0x921c, 0xffffffff, 0x0000000f,
663         0x9220, 0xffffffff, 0x000e000d,
664         0x9224, 0xffffffff, 0x00110010,
665         0x9228, 0xffffffff, 0x000b000a,
666         0x922c, 0xffffffff, 0x000d000c,
667         0x9230, 0xffffffff, 0x00000010,
668         0x9234, 0xffffffff, 0x000f000e,
669         0x9238, 0xffffffff, 0x00120011,
670         0x923c, 0xffffffff, 0x000c000b,
671         0x9240, 0xffffffff, 0x000e000d,
672         0x9244, 0xffffffff, 0x00000011,
673         0x9248, 0xffffffff, 0x0010000f,
674         0x924c, 0xffffffff, 0x00130012,
675         0x9250, 0xffffffff, 0x000d000c,
676         0x9254, 0xffffffff, 0x000f000e,
677         0x9258, 0xffffffff, 0x00100013,
678         0x925c, 0xffffffff, 0x00120011,
679         0x9260, 0xffffffff, 0x00150014,
680         0x9264, 0xffffffff, 0x000e000d,
681         0x9268, 0xffffffff, 0x0010000f,
682         0x926c, 0xffffffff, 0x00000013,
683         0x9270, 0xffffffff, 0x00120011,
684         0x9274, 0xffffffff, 0x00150014,
685         0x9278, 0xffffffff, 0x000f000e,
686         0x927c, 0xffffffff, 0x00110010,
687         0x9280, 0xffffffff, 0x00000014,
688         0x9284, 0xffffffff, 0x00130012,
689         0x9288, 0xffffffff, 0x00160015,
690         0x928c, 0xffffffff, 0x0010000f,
691         0x9290, 0xffffffff, 0x00120011,
692         0x9294, 0xffffffff, 0x00000015,
693         0x9298, 0xffffffff, 0x00140013,
694         0x929c, 0xffffffff, 0x00170016,
695         0x9150, 0xffffffff, 0x96940200,
696         0x8708, 0xffffffff, 0x00900100,
697         0xc478, 0xffffffff, 0x00000080,
698         0xc404, 0xffffffff, 0x0020003f,
699         0x30, 0xffffffff, 0x0000001c,
700         0x34, 0x000f0000, 0x000f0000,
701         0x160c, 0xffffffff, 0x00000100,
702         0x1024, 0xffffffff, 0x00000100,
703         0x102c, 0x00000101, 0x00000000,
704         0x20a8, 0xffffffff, 0x00000104,
705         0x264c, 0x000c0000, 0x000c0000,
706         0x2648, 0x000c0000, 0x000c0000,
707         0x55e4, 0xff000fff, 0x00000100,
708         0x55e8, 0x00000001, 0x00000001,
709         0x2f50, 0x00000001, 0x00000001,
710         0x30cc, 0xc0000fff, 0x00000104,
711         0xc1e4, 0x00000001, 0x00000001,
712         0xd0c0, 0xfffffff0, 0x00000100,
713         0xd8c0, 0xfffffff0, 0x00000100
714 };
715
716 static const u32 pitcairn_mgcg_cgcg_init[] =
717 {
718         0xc400, 0xffffffff, 0xfffffffc,
719         0x802c, 0xffffffff, 0xe0000000,
720         0x9a60, 0xffffffff, 0x00000100,
721         0x92a4, 0xffffffff, 0x00000100,
722         0xc164, 0xffffffff, 0x00000100,
723         0x9774, 0xffffffff, 0x00000100,
724         0x8984, 0xffffffff, 0x06000100,
725         0x8a18, 0xffffffff, 0x00000100,
726         0x92a0, 0xffffffff, 0x00000100,
727         0xc380, 0xffffffff, 0x00000100,
728         0x8b28, 0xffffffff, 0x00000100,
729         0x9144, 0xffffffff, 0x00000100,
730         0x8d88, 0xffffffff, 0x00000100,
731         0x8d8c, 0xffffffff, 0x00000100,
732         0x9030, 0xffffffff, 0x00000100,
733         0x9034, 0xffffffff, 0x00000100,
734         0x9038, 0xffffffff, 0x00000100,
735         0x903c, 0xffffffff, 0x00000100,
736         0xad80, 0xffffffff, 0x00000100,
737         0xac54, 0xffffffff, 0x00000100,
738         0x897c, 0xffffffff, 0x06000100,
739         0x9868, 0xffffffff, 0x00000100,
740         0x9510, 0xffffffff, 0x00000100,
741         0xaf04, 0xffffffff, 0x00000100,
742         0xae04, 0xffffffff, 0x00000100,
743         0x949c, 0xffffffff, 0x00000100,
744         0x802c, 0xffffffff, 0xe0000000,
745         0x9160, 0xffffffff, 0x00010000,
746         0x9164, 0xffffffff, 0x00030002,
747         0x9168, 0xffffffff, 0x00040007,
748         0x916c, 0xffffffff, 0x00060005,
749         0x9170, 0xffffffff, 0x00090008,
750         0x9174, 0xffffffff, 0x00020001,
751         0x9178, 0xffffffff, 0x00040003,
752         0x917c, 0xffffffff, 0x00000007,
753         0x9180, 0xffffffff, 0x00060005,
754         0x9184, 0xffffffff, 0x00090008,
755         0x9188, 0xffffffff, 0x00030002,
756         0x918c, 0xffffffff, 0x00050004,
757         0x9190, 0xffffffff, 0x00000008,
758         0x9194, 0xffffffff, 0x00070006,
759         0x9198, 0xffffffff, 0x000a0009,
760         0x919c, 0xffffffff, 0x00040003,
761         0x91a0, 0xffffffff, 0x00060005,
762         0x91a4, 0xffffffff, 0x00000009,
763         0x91a8, 0xffffffff, 0x00080007,
764         0x91ac, 0xffffffff, 0x000b000a,
765         0x91b0, 0xffffffff, 0x00050004,
766         0x91b4, 0xffffffff, 0x00070006,
767         0x91b8, 0xffffffff, 0x0008000b,
768         0x91bc, 0xffffffff, 0x000a0009,
769         0x91c0, 0xffffffff, 0x000d000c,
770         0x9200, 0xffffffff, 0x00090008,
771         0x9204, 0xffffffff, 0x000b000a,
772         0x9208, 0xffffffff, 0x000c000f,
773         0x920c, 0xffffffff, 0x000e000d,
774         0x9210, 0xffffffff, 0x00110010,
775         0x9214, 0xffffffff, 0x000a0009,
776         0x9218, 0xffffffff, 0x000c000b,
777         0x921c, 0xffffffff, 0x0000000f,
778         0x9220, 0xffffffff, 0x000e000d,
779         0x9224, 0xffffffff, 0x00110010,
780         0x9228, 0xffffffff, 0x000b000a,
781         0x922c, 0xffffffff, 0x000d000c,
782         0x9230, 0xffffffff, 0x00000010,
783         0x9234, 0xffffffff, 0x000f000e,
784         0x9238, 0xffffffff, 0x00120011,
785         0x923c, 0xffffffff, 0x000c000b,
786         0x9240, 0xffffffff, 0x000e000d,
787         0x9244, 0xffffffff, 0x00000011,
788         0x9248, 0xffffffff, 0x0010000f,
789         0x924c, 0xffffffff, 0x00130012,
790         0x9250, 0xffffffff, 0x000d000c,
791         0x9254, 0xffffffff, 0x000f000e,
792         0x9258, 0xffffffff, 0x00100013,
793         0x925c, 0xffffffff, 0x00120011,
794         0x9260, 0xffffffff, 0x00150014,
795         0x9150, 0xffffffff, 0x96940200,
796         0x8708, 0xffffffff, 0x00900100,
797         0xc478, 0xffffffff, 0x00000080,
798         0xc404, 0xffffffff, 0x0020003f,
799         0x30, 0xffffffff, 0x0000001c,
800         0x34, 0x000f0000, 0x000f0000,
801         0x160c, 0xffffffff, 0x00000100,
802         0x1024, 0xffffffff, 0x00000100,
803         0x102c, 0x00000101, 0x00000000,
804         0x20a8, 0xffffffff, 0x00000104,
805         0x55e4, 0xff000fff, 0x00000100,
806         0x55e8, 0x00000001, 0x00000001,
807         0x2f50, 0x00000001, 0x00000001,
808         0x30cc, 0xc0000fff, 0x00000104,
809         0xc1e4, 0x00000001, 0x00000001,
810         0xd0c0, 0xfffffff0, 0x00000100,
811         0xd8c0, 0xfffffff0, 0x00000100
812 };
813
814 static const u32 verde_mgcg_cgcg_init[] =
815 {
816         0xc400, 0xffffffff, 0xfffffffc,
817         0x802c, 0xffffffff, 0xe0000000,
818         0x9a60, 0xffffffff, 0x00000100,
819         0x92a4, 0xffffffff, 0x00000100,
820         0xc164, 0xffffffff, 0x00000100,
821         0x9774, 0xffffffff, 0x00000100,
822         0x8984, 0xffffffff, 0x06000100,
823         0x8a18, 0xffffffff, 0x00000100,
824         0x92a0, 0xffffffff, 0x00000100,
825         0xc380, 0xffffffff, 0x00000100,
826         0x8b28, 0xffffffff, 0x00000100,
827         0x9144, 0xffffffff, 0x00000100,
828         0x8d88, 0xffffffff, 0x00000100,
829         0x8d8c, 0xffffffff, 0x00000100,
830         0x9030, 0xffffffff, 0x00000100,
831         0x9034, 0xffffffff, 0x00000100,
832         0x9038, 0xffffffff, 0x00000100,
833         0x903c, 0xffffffff, 0x00000100,
834         0xad80, 0xffffffff, 0x00000100,
835         0xac54, 0xffffffff, 0x00000100,
836         0x897c, 0xffffffff, 0x06000100,
837         0x9868, 0xffffffff, 0x00000100,
838         0x9510, 0xffffffff, 0x00000100,
839         0xaf04, 0xffffffff, 0x00000100,
840         0xae04, 0xffffffff, 0x00000100,
841         0x949c, 0xffffffff, 0x00000100,
842         0x802c, 0xffffffff, 0xe0000000,
843         0x9160, 0xffffffff, 0x00010000,
844         0x9164, 0xffffffff, 0x00030002,
845         0x9168, 0xffffffff, 0x00040007,
846         0x916c, 0xffffffff, 0x00060005,
847         0x9170, 0xffffffff, 0x00090008,
848         0x9174, 0xffffffff, 0x00020001,
849         0x9178, 0xffffffff, 0x00040003,
850         0x917c, 0xffffffff, 0x00000007,
851         0x9180, 0xffffffff, 0x00060005,
852         0x9184, 0xffffffff, 0x00090008,
853         0x9188, 0xffffffff, 0x00030002,
854         0x918c, 0xffffffff, 0x00050004,
855         0x9190, 0xffffffff, 0x00000008,
856         0x9194, 0xffffffff, 0x00070006,
857         0x9198, 0xffffffff, 0x000a0009,
858         0x919c, 0xffffffff, 0x00040003,
859         0x91a0, 0xffffffff, 0x00060005,
860         0x91a4, 0xffffffff, 0x00000009,
861         0x91a8, 0xffffffff, 0x00080007,
862         0x91ac, 0xffffffff, 0x000b000a,
863         0x91b0, 0xffffffff, 0x00050004,
864         0x91b4, 0xffffffff, 0x00070006,
865         0x91b8, 0xffffffff, 0x0008000b,
866         0x91bc, 0xffffffff, 0x000a0009,
867         0x91c0, 0xffffffff, 0x000d000c,
868         0x9200, 0xffffffff, 0x00090008,
869         0x9204, 0xffffffff, 0x000b000a,
870         0x9208, 0xffffffff, 0x000c000f,
871         0x920c, 0xffffffff, 0x000e000d,
872         0x9210, 0xffffffff, 0x00110010,
873         0x9214, 0xffffffff, 0x000a0009,
874         0x9218, 0xffffffff, 0x000c000b,
875         0x921c, 0xffffffff, 0x0000000f,
876         0x9220, 0xffffffff, 0x000e000d,
877         0x9224, 0xffffffff, 0x00110010,
878         0x9228, 0xffffffff, 0x000b000a,
879         0x922c, 0xffffffff, 0x000d000c,
880         0x9230, 0xffffffff, 0x00000010,
881         0x9234, 0xffffffff, 0x000f000e,
882         0x9238, 0xffffffff, 0x00120011,
883         0x923c, 0xffffffff, 0x000c000b,
884         0x9240, 0xffffffff, 0x000e000d,
885         0x9244, 0xffffffff, 0x00000011,
886         0x9248, 0xffffffff, 0x0010000f,
887         0x924c, 0xffffffff, 0x00130012,
888         0x9250, 0xffffffff, 0x000d000c,
889         0x9254, 0xffffffff, 0x000f000e,
890         0x9258, 0xffffffff, 0x00100013,
891         0x925c, 0xffffffff, 0x00120011,
892         0x9260, 0xffffffff, 0x00150014,
893         0x9150, 0xffffffff, 0x96940200,
894         0x8708, 0xffffffff, 0x00900100,
895         0xc478, 0xffffffff, 0x00000080,
896         0xc404, 0xffffffff, 0x0020003f,
897         0x30, 0xffffffff, 0x0000001c,
898         0x34, 0x000f0000, 0x000f0000,
899         0x160c, 0xffffffff, 0x00000100,
900         0x1024, 0xffffffff, 0x00000100,
901         0x102c, 0x00000101, 0x00000000,
902         0x20a8, 0xffffffff, 0x00000104,
903         0x264c, 0x000c0000, 0x000c0000,
904         0x2648, 0x000c0000, 0x000c0000,
905         0x55e4, 0xff000fff, 0x00000100,
906         0x55e8, 0x00000001, 0x00000001,
907         0x2f50, 0x00000001, 0x00000001,
908         0x30cc, 0xc0000fff, 0x00000104,
909         0xc1e4, 0x00000001, 0x00000001,
910         0xd0c0, 0xfffffff0, 0x00000100,
911         0xd8c0, 0xfffffff0, 0x00000100
912 };
913
914 static const u32 oland_mgcg_cgcg_init[] =
915 {
916         0xc400, 0xffffffff, 0xfffffffc,
917         0x802c, 0xffffffff, 0xe0000000,
918         0x9a60, 0xffffffff, 0x00000100,
919         0x92a4, 0xffffffff, 0x00000100,
920         0xc164, 0xffffffff, 0x00000100,
921         0x9774, 0xffffffff, 0x00000100,
922         0x8984, 0xffffffff, 0x06000100,
923         0x8a18, 0xffffffff, 0x00000100,
924         0x92a0, 0xffffffff, 0x00000100,
925         0xc380, 0xffffffff, 0x00000100,
926         0x8b28, 0xffffffff, 0x00000100,
927         0x9144, 0xffffffff, 0x00000100,
928         0x8d88, 0xffffffff, 0x00000100,
929         0x8d8c, 0xffffffff, 0x00000100,
930         0x9030, 0xffffffff, 0x00000100,
931         0x9034, 0xffffffff, 0x00000100,
932         0x9038, 0xffffffff, 0x00000100,
933         0x903c, 0xffffffff, 0x00000100,
934         0xad80, 0xffffffff, 0x00000100,
935         0xac54, 0xffffffff, 0x00000100,
936         0x897c, 0xffffffff, 0x06000100,
937         0x9868, 0xffffffff, 0x00000100,
938         0x9510, 0xffffffff, 0x00000100,
939         0xaf04, 0xffffffff, 0x00000100,
940         0xae04, 0xffffffff, 0x00000100,
941         0x949c, 0xffffffff, 0x00000100,
942         0x802c, 0xffffffff, 0xe0000000,
943         0x9160, 0xffffffff, 0x00010000,
944         0x9164, 0xffffffff, 0x00030002,
945         0x9168, 0xffffffff, 0x00040007,
946         0x916c, 0xffffffff, 0x00060005,
947         0x9170, 0xffffffff, 0x00090008,
948         0x9174, 0xffffffff, 0x00020001,
949         0x9178, 0xffffffff, 0x00040003,
950         0x917c, 0xffffffff, 0x00000007,
951         0x9180, 0xffffffff, 0x00060005,
952         0x9184, 0xffffffff, 0x00090008,
953         0x9188, 0xffffffff, 0x00030002,
954         0x918c, 0xffffffff, 0x00050004,
955         0x9190, 0xffffffff, 0x00000008,
956         0x9194, 0xffffffff, 0x00070006,
957         0x9198, 0xffffffff, 0x000a0009,
958         0x919c, 0xffffffff, 0x00040003,
959         0x91a0, 0xffffffff, 0x00060005,
960         0x91a4, 0xffffffff, 0x00000009,
961         0x91a8, 0xffffffff, 0x00080007,
962         0x91ac, 0xffffffff, 0x000b000a,
963         0x91b0, 0xffffffff, 0x00050004,
964         0x91b4, 0xffffffff, 0x00070006,
965         0x91b8, 0xffffffff, 0x0008000b,
966         0x91bc, 0xffffffff, 0x000a0009,
967         0x91c0, 0xffffffff, 0x000d000c,
968         0x91c4, 0xffffffff, 0x00060005,
969         0x91c8, 0xffffffff, 0x00080007,
970         0x91cc, 0xffffffff, 0x0000000b,
971         0x91d0, 0xffffffff, 0x000a0009,
972         0x91d4, 0xffffffff, 0x000d000c,
973         0x9150, 0xffffffff, 0x96940200,
974         0x8708, 0xffffffff, 0x00900100,
975         0xc478, 0xffffffff, 0x00000080,
976         0xc404, 0xffffffff, 0x0020003f,
977         0x30, 0xffffffff, 0x0000001c,
978         0x34, 0x000f0000, 0x000f0000,
979         0x160c, 0xffffffff, 0x00000100,
980         0x1024, 0xffffffff, 0x00000100,
981         0x102c, 0x00000101, 0x00000000,
982         0x20a8, 0xffffffff, 0x00000104,
983         0x264c, 0x000c0000, 0x000c0000,
984         0x2648, 0x000c0000, 0x000c0000,
985         0x55e4, 0xff000fff, 0x00000100,
986         0x55e8, 0x00000001, 0x00000001,
987         0x2f50, 0x00000001, 0x00000001,
988         0x30cc, 0xc0000fff, 0x00000104,
989         0xc1e4, 0x00000001, 0x00000001,
990         0xd0c0, 0xfffffff0, 0x00000100,
991         0xd8c0, 0xfffffff0, 0x00000100
992 };
993
994 static const u32 hainan_mgcg_cgcg_init[] =
995 {
996         0xc400, 0xffffffff, 0xfffffffc,
997         0x802c, 0xffffffff, 0xe0000000,
998         0x9a60, 0xffffffff, 0x00000100,
999         0x92a4, 0xffffffff, 0x00000100,
1000         0xc164, 0xffffffff, 0x00000100,
1001         0x9774, 0xffffffff, 0x00000100,
1002         0x8984, 0xffffffff, 0x06000100,
1003         0x8a18, 0xffffffff, 0x00000100,
1004         0x92a0, 0xffffffff, 0x00000100,
1005         0xc380, 0xffffffff, 0x00000100,
1006         0x8b28, 0xffffffff, 0x00000100,
1007         0x9144, 0xffffffff, 0x00000100,
1008         0x8d88, 0xffffffff, 0x00000100,
1009         0x8d8c, 0xffffffff, 0x00000100,
1010         0x9030, 0xffffffff, 0x00000100,
1011         0x9034, 0xffffffff, 0x00000100,
1012         0x9038, 0xffffffff, 0x00000100,
1013         0x903c, 0xffffffff, 0x00000100,
1014         0xad80, 0xffffffff, 0x00000100,
1015         0xac54, 0xffffffff, 0x00000100,
1016         0x897c, 0xffffffff, 0x06000100,
1017         0x9868, 0xffffffff, 0x00000100,
1018         0x9510, 0xffffffff, 0x00000100,
1019         0xaf04, 0xffffffff, 0x00000100,
1020         0xae04, 0xffffffff, 0x00000100,
1021         0x949c, 0xffffffff, 0x00000100,
1022         0x802c, 0xffffffff, 0xe0000000,
1023         0x9160, 0xffffffff, 0x00010000,
1024         0x9164, 0xffffffff, 0x00030002,
1025         0x9168, 0xffffffff, 0x00040007,
1026         0x916c, 0xffffffff, 0x00060005,
1027         0x9170, 0xffffffff, 0x00090008,
1028         0x9174, 0xffffffff, 0x00020001,
1029         0x9178, 0xffffffff, 0x00040003,
1030         0x917c, 0xffffffff, 0x00000007,
1031         0x9180, 0xffffffff, 0x00060005,
1032         0x9184, 0xffffffff, 0x00090008,
1033         0x9188, 0xffffffff, 0x00030002,
1034         0x918c, 0xffffffff, 0x00050004,
1035         0x9190, 0xffffffff, 0x00000008,
1036         0x9194, 0xffffffff, 0x00070006,
1037         0x9198, 0xffffffff, 0x000a0009,
1038         0x919c, 0xffffffff, 0x00040003,
1039         0x91a0, 0xffffffff, 0x00060005,
1040         0x91a4, 0xffffffff, 0x00000009,
1041         0x91a8, 0xffffffff, 0x00080007,
1042         0x91ac, 0xffffffff, 0x000b000a,
1043         0x91b0, 0xffffffff, 0x00050004,
1044         0x91b4, 0xffffffff, 0x00070006,
1045         0x91b8, 0xffffffff, 0x0008000b,
1046         0x91bc, 0xffffffff, 0x000a0009,
1047         0x91c0, 0xffffffff, 0x000d000c,
1048         0x91c4, 0xffffffff, 0x00060005,
1049         0x91c8, 0xffffffff, 0x00080007,
1050         0x91cc, 0xffffffff, 0x0000000b,
1051         0x91d0, 0xffffffff, 0x000a0009,
1052         0x91d4, 0xffffffff, 0x000d000c,
1053         0x9150, 0xffffffff, 0x96940200,
1054         0x8708, 0xffffffff, 0x00900100,
1055         0xc478, 0xffffffff, 0x00000080,
1056         0xc404, 0xffffffff, 0x0020003f,
1057         0x30, 0xffffffff, 0x0000001c,
1058         0x34, 0x000f0000, 0x000f0000,
1059         0x160c, 0xffffffff, 0x00000100,
1060         0x1024, 0xffffffff, 0x00000100,
1061         0x20a8, 0xffffffff, 0x00000104,
1062         0x264c, 0x000c0000, 0x000c0000,
1063         0x2648, 0x000c0000, 0x000c0000,
1064         0x2f50, 0x00000001, 0x00000001,
1065         0x30cc, 0xc0000fff, 0x00000104,
1066         0xc1e4, 0x00000001, 0x00000001,
1067         0xd0c0, 0xfffffff0, 0x00000100,
1068         0xd8c0, 0xfffffff0, 0x00000100
1069 };
1070
1071 static u32 verde_pg_init[] =
1072 {
1073         0x353c, 0xffffffff, 0x40000,
1074         0x3538, 0xffffffff, 0x200010ff,
1075         0x353c, 0xffffffff, 0x0,
1076         0x353c, 0xffffffff, 0x0,
1077         0x353c, 0xffffffff, 0x0,
1078         0x353c, 0xffffffff, 0x0,
1079         0x353c, 0xffffffff, 0x0,
1080         0x353c, 0xffffffff, 0x7007,
1081         0x3538, 0xffffffff, 0x300010ff,
1082         0x353c, 0xffffffff, 0x0,
1083         0x353c, 0xffffffff, 0x0,
1084         0x353c, 0xffffffff, 0x0,
1085         0x353c, 0xffffffff, 0x0,
1086         0x353c, 0xffffffff, 0x0,
1087         0x353c, 0xffffffff, 0x400000,
1088         0x3538, 0xffffffff, 0x100010ff,
1089         0x353c, 0xffffffff, 0x0,
1090         0x353c, 0xffffffff, 0x0,
1091         0x353c, 0xffffffff, 0x0,
1092         0x353c, 0xffffffff, 0x0,
1093         0x353c, 0xffffffff, 0x0,
1094         0x353c, 0xffffffff, 0x120200,
1095         0x3538, 0xffffffff, 0x500010ff,
1096         0x353c, 0xffffffff, 0x0,
1097         0x353c, 0xffffffff, 0x0,
1098         0x353c, 0xffffffff, 0x0,
1099         0x353c, 0xffffffff, 0x0,
1100         0x353c, 0xffffffff, 0x0,
1101         0x353c, 0xffffffff, 0x1e1e16,
1102         0x3538, 0xffffffff, 0x600010ff,
1103         0x353c, 0xffffffff, 0x0,
1104         0x353c, 0xffffffff, 0x0,
1105         0x353c, 0xffffffff, 0x0,
1106         0x353c, 0xffffffff, 0x0,
1107         0x353c, 0xffffffff, 0x0,
1108         0x353c, 0xffffffff, 0x171f1e,
1109         0x3538, 0xffffffff, 0x700010ff,
1110         0x353c, 0xffffffff, 0x0,
1111         0x353c, 0xffffffff, 0x0,
1112         0x353c, 0xffffffff, 0x0,
1113         0x353c, 0xffffffff, 0x0,
1114         0x353c, 0xffffffff, 0x0,
1115         0x353c, 0xffffffff, 0x0,
1116         0x3538, 0xffffffff, 0x9ff,
1117         0x3500, 0xffffffff, 0x0,
1118         0x3504, 0xffffffff, 0x10000800,
1119         0x3504, 0xffffffff, 0xf,
1120         0x3504, 0xffffffff, 0xf,
1121         0x3500, 0xffffffff, 0x4,
1122         0x3504, 0xffffffff, 0x1000051e,
1123         0x3504, 0xffffffff, 0xffff,
1124         0x3504, 0xffffffff, 0xffff,
1125         0x3500, 0xffffffff, 0x8,
1126         0x3504, 0xffffffff, 0x80500,
1127         0x3500, 0xffffffff, 0x12,
1128         0x3504, 0xffffffff, 0x9050c,
1129         0x3500, 0xffffffff, 0x1d,
1130         0x3504, 0xffffffff, 0xb052c,
1131         0x3500, 0xffffffff, 0x2a,
1132         0x3504, 0xffffffff, 0x1053e,
1133         0x3500, 0xffffffff, 0x2d,
1134         0x3504, 0xffffffff, 0x10546,
1135         0x3500, 0xffffffff, 0x30,
1136         0x3504, 0xffffffff, 0xa054e,
1137         0x3500, 0xffffffff, 0x3c,
1138         0x3504, 0xffffffff, 0x1055f,
1139         0x3500, 0xffffffff, 0x3f,
1140         0x3504, 0xffffffff, 0x10567,
1141         0x3500, 0xffffffff, 0x42,
1142         0x3504, 0xffffffff, 0x1056f,
1143         0x3500, 0xffffffff, 0x45,
1144         0x3504, 0xffffffff, 0x10572,
1145         0x3500, 0xffffffff, 0x48,
1146         0x3504, 0xffffffff, 0x20575,
1147         0x3500, 0xffffffff, 0x4c,
1148         0x3504, 0xffffffff, 0x190801,
1149         0x3500, 0xffffffff, 0x67,
1150         0x3504, 0xffffffff, 0x1082a,
1151         0x3500, 0xffffffff, 0x6a,
1152         0x3504, 0xffffffff, 0x1b082d,
1153         0x3500, 0xffffffff, 0x87,
1154         0x3504, 0xffffffff, 0x310851,
1155         0x3500, 0xffffffff, 0xba,
1156         0x3504, 0xffffffff, 0x891,
1157         0x3500, 0xffffffff, 0xbc,
1158         0x3504, 0xffffffff, 0x893,
1159         0x3500, 0xffffffff, 0xbe,
1160         0x3504, 0xffffffff, 0x20895,
1161         0x3500, 0xffffffff, 0xc2,
1162         0x3504, 0xffffffff, 0x20899,
1163         0x3500, 0xffffffff, 0xc6,
1164         0x3504, 0xffffffff, 0x2089d,
1165         0x3500, 0xffffffff, 0xca,
1166         0x3504, 0xffffffff, 0x8a1,
1167         0x3500, 0xffffffff, 0xcc,
1168         0x3504, 0xffffffff, 0x8a3,
1169         0x3500, 0xffffffff, 0xce,
1170         0x3504, 0xffffffff, 0x308a5,
1171         0x3500, 0xffffffff, 0xd3,
1172         0x3504, 0xffffffff, 0x6d08cd,
1173         0x3500, 0xffffffff, 0x142,
1174         0x3504, 0xffffffff, 0x2000095a,
1175         0x3504, 0xffffffff, 0x1,
1176         0x3500, 0xffffffff, 0x144,
1177         0x3504, 0xffffffff, 0x301f095b,
1178         0x3500, 0xffffffff, 0x165,
1179         0x3504, 0xffffffff, 0xc094d,
1180         0x3500, 0xffffffff, 0x173,
1181         0x3504, 0xffffffff, 0xf096d,
1182         0x3500, 0xffffffff, 0x184,
1183         0x3504, 0xffffffff, 0x15097f,
1184         0x3500, 0xffffffff, 0x19b,
1185         0x3504, 0xffffffff, 0xc0998,
1186         0x3500, 0xffffffff, 0x1a9,
1187         0x3504, 0xffffffff, 0x409a7,
1188         0x3500, 0xffffffff, 0x1af,
1189         0x3504, 0xffffffff, 0xcdc,
1190         0x3500, 0xffffffff, 0x1b1,
1191         0x3504, 0xffffffff, 0x800,
1192         0x3508, 0xffffffff, 0x6c9b2000,
1193         0x3510, 0xfc00, 0x2000,
1194         0x3544, 0xffffffff, 0xfc0,
1195         0x28d4, 0x00000100, 0x100
1196 };
1197
1198 static void si_init_golden_registers(struct radeon_device *rdev)
1199 {
1200         switch (rdev->family) {
1201         case CHIP_TAHITI:
1202                 radeon_program_register_sequence(rdev,
1203                                                  tahiti_golden_registers,
1204                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1205                 radeon_program_register_sequence(rdev,
1206                                                  tahiti_golden_rlc_registers,
1207                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1208                 radeon_program_register_sequence(rdev,
1209                                                  tahiti_mgcg_cgcg_init,
1210                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1211                 radeon_program_register_sequence(rdev,
1212                                                  tahiti_golden_registers2,
1213                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1214                 break;
1215         case CHIP_PITCAIRN:
1216                 radeon_program_register_sequence(rdev,
1217                                                  pitcairn_golden_registers,
1218                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1219                 radeon_program_register_sequence(rdev,
1220                                                  pitcairn_golden_rlc_registers,
1221                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1222                 radeon_program_register_sequence(rdev,
1223                                                  pitcairn_mgcg_cgcg_init,
1224                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1225                 break;
1226         case CHIP_VERDE:
1227                 radeon_program_register_sequence(rdev,
1228                                                  verde_golden_registers,
1229                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1230                 radeon_program_register_sequence(rdev,
1231                                                  verde_golden_rlc_registers,
1232                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1233                 radeon_program_register_sequence(rdev,
1234                                                  verde_mgcg_cgcg_init,
1235                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1236                 radeon_program_register_sequence(rdev,
1237                                                  verde_pg_init,
1238                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1239                 break;
1240         case CHIP_OLAND:
1241                 radeon_program_register_sequence(rdev,
1242                                                  oland_golden_registers,
1243                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1244                 radeon_program_register_sequence(rdev,
1245                                                  oland_golden_rlc_registers,
1246                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1247                 radeon_program_register_sequence(rdev,
1248                                                  oland_mgcg_cgcg_init,
1249                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1250                 break;
1251         case CHIP_HAINAN:
1252                 radeon_program_register_sequence(rdev,
1253                                                  hainan_golden_registers,
1254                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1255                 radeon_program_register_sequence(rdev,
1256                                                  hainan_golden_registers2,
1257                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1258                 radeon_program_register_sequence(rdev,
1259                                                  hainan_mgcg_cgcg_init,
1260                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1261                 break;
1262         default:
1263                 break;
1264         }
1265 }
1266
1267 #define PCIE_BUS_CLK                10000
1268 #define TCLK                        (PCIE_BUS_CLK / 10)
1269
1270 /**
1271  * si_get_xclk - get the xclk
1272  *
1273  * @rdev: radeon_device pointer
1274  *
1275  * Returns the reference clock used by the gfx engine
1276  * (SI).
1277  */
1278 u32 si_get_xclk(struct radeon_device *rdev)
1279 {
1280         u32 reference_clock = rdev->clock.spll.reference_freq;
1281         u32 tmp;
1282
1283         tmp = RREG32(CG_CLKPIN_CNTL_2);
1284         if (tmp & MUX_TCLK_TO_XCLK)
1285                 return TCLK;
1286
1287         tmp = RREG32(CG_CLKPIN_CNTL);
1288         if (tmp & XTALIN_DIVIDE)
1289                 return reference_clock / 4;
1290
1291         return reference_clock;
1292 }
1293
1294 /* get temperature in millidegrees */
1295 int si_get_temp(struct radeon_device *rdev)
1296 {
1297         u32 temp;
1298         int actual_temp = 0;
1299
1300         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1301                 CTF_TEMP_SHIFT;
1302
1303         if (temp & 0x200)
1304                 actual_temp = 255;
1305         else
1306                 actual_temp = temp & 0x1ff;
1307
1308         actual_temp = (actual_temp * 1000);
1309
1310         return actual_temp;
1311 }
1312
1313 #define TAHITI_IO_MC_REGS_SIZE 36
1314
1315 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1316         {0x0000006f, 0x03044000},
1317         {0x00000070, 0x0480c018},
1318         {0x00000071, 0x00000040},
1319         {0x00000072, 0x01000000},
1320         {0x00000074, 0x000000ff},
1321         {0x00000075, 0x00143400},
1322         {0x00000076, 0x08ec0800},
1323         {0x00000077, 0x040000cc},
1324         {0x00000079, 0x00000000},
1325         {0x0000007a, 0x21000409},
1326         {0x0000007c, 0x00000000},
1327         {0x0000007d, 0xe8000000},
1328         {0x0000007e, 0x044408a8},
1329         {0x0000007f, 0x00000003},
1330         {0x00000080, 0x00000000},
1331         {0x00000081, 0x01000000},
1332         {0x00000082, 0x02000000},
1333         {0x00000083, 0x00000000},
1334         {0x00000084, 0xe3f3e4f4},
1335         {0x00000085, 0x00052024},
1336         {0x00000087, 0x00000000},
1337         {0x00000088, 0x66036603},
1338         {0x00000089, 0x01000000},
1339         {0x0000008b, 0x1c0a0000},
1340         {0x0000008c, 0xff010000},
1341         {0x0000008e, 0xffffefff},
1342         {0x0000008f, 0xfff3efff},
1343         {0x00000090, 0xfff3efbf},
1344         {0x00000094, 0x00101101},
1345         {0x00000095, 0x00000fff},
1346         {0x00000096, 0x00116fff},
1347         {0x00000097, 0x60010000},
1348         {0x00000098, 0x10010000},
1349         {0x00000099, 0x00006000},
1350         {0x0000009a, 0x00001000},
1351         {0x0000009f, 0x00a77400}
1352 };
1353
1354 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1355         {0x0000006f, 0x03044000},
1356         {0x00000070, 0x0480c018},
1357         {0x00000071, 0x00000040},
1358         {0x00000072, 0x01000000},
1359         {0x00000074, 0x000000ff},
1360         {0x00000075, 0x00143400},
1361         {0x00000076, 0x08ec0800},
1362         {0x00000077, 0x040000cc},
1363         {0x00000079, 0x00000000},
1364         {0x0000007a, 0x21000409},
1365         {0x0000007c, 0x00000000},
1366         {0x0000007d, 0xe8000000},
1367         {0x0000007e, 0x044408a8},
1368         {0x0000007f, 0x00000003},
1369         {0x00000080, 0x00000000},
1370         {0x00000081, 0x01000000},
1371         {0x00000082, 0x02000000},
1372         {0x00000083, 0x00000000},
1373         {0x00000084, 0xe3f3e4f4},
1374         {0x00000085, 0x00052024},
1375         {0x00000087, 0x00000000},
1376         {0x00000088, 0x66036603},
1377         {0x00000089, 0x01000000},
1378         {0x0000008b, 0x1c0a0000},
1379         {0x0000008c, 0xff010000},
1380         {0x0000008e, 0xffffefff},
1381         {0x0000008f, 0xfff3efff},
1382         {0x00000090, 0xfff3efbf},
1383         {0x00000094, 0x00101101},
1384         {0x00000095, 0x00000fff},
1385         {0x00000096, 0x00116fff},
1386         {0x00000097, 0x60010000},
1387         {0x00000098, 0x10010000},
1388         {0x00000099, 0x00006000},
1389         {0x0000009a, 0x00001000},
1390         {0x0000009f, 0x00a47400}
1391 };
1392
1393 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1394         {0x0000006f, 0x03044000},
1395         {0x00000070, 0x0480c018},
1396         {0x00000071, 0x00000040},
1397         {0x00000072, 0x01000000},
1398         {0x00000074, 0x000000ff},
1399         {0x00000075, 0x00143400},
1400         {0x00000076, 0x08ec0800},
1401         {0x00000077, 0x040000cc},
1402         {0x00000079, 0x00000000},
1403         {0x0000007a, 0x21000409},
1404         {0x0000007c, 0x00000000},
1405         {0x0000007d, 0xe8000000},
1406         {0x0000007e, 0x044408a8},
1407         {0x0000007f, 0x00000003},
1408         {0x00000080, 0x00000000},
1409         {0x00000081, 0x01000000},
1410         {0x00000082, 0x02000000},
1411         {0x00000083, 0x00000000},
1412         {0x00000084, 0xe3f3e4f4},
1413         {0x00000085, 0x00052024},
1414         {0x00000087, 0x00000000},
1415         {0x00000088, 0x66036603},
1416         {0x00000089, 0x01000000},
1417         {0x0000008b, 0x1c0a0000},
1418         {0x0000008c, 0xff010000},
1419         {0x0000008e, 0xffffefff},
1420         {0x0000008f, 0xfff3efff},
1421         {0x00000090, 0xfff3efbf},
1422         {0x00000094, 0x00101101},
1423         {0x00000095, 0x00000fff},
1424         {0x00000096, 0x00116fff},
1425         {0x00000097, 0x60010000},
1426         {0x00000098, 0x10010000},
1427         {0x00000099, 0x00006000},
1428         {0x0000009a, 0x00001000},
1429         {0x0000009f, 0x00a37400}
1430 };
1431
1432 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1433         {0x0000006f, 0x03044000},
1434         {0x00000070, 0x0480c018},
1435         {0x00000071, 0x00000040},
1436         {0x00000072, 0x01000000},
1437         {0x00000074, 0x000000ff},
1438         {0x00000075, 0x00143400},
1439         {0x00000076, 0x08ec0800},
1440         {0x00000077, 0x040000cc},
1441         {0x00000079, 0x00000000},
1442         {0x0000007a, 0x21000409},
1443         {0x0000007c, 0x00000000},
1444         {0x0000007d, 0xe8000000},
1445         {0x0000007e, 0x044408a8},
1446         {0x0000007f, 0x00000003},
1447         {0x00000080, 0x00000000},
1448         {0x00000081, 0x01000000},
1449         {0x00000082, 0x02000000},
1450         {0x00000083, 0x00000000},
1451         {0x00000084, 0xe3f3e4f4},
1452         {0x00000085, 0x00052024},
1453         {0x00000087, 0x00000000},
1454         {0x00000088, 0x66036603},
1455         {0x00000089, 0x01000000},
1456         {0x0000008b, 0x1c0a0000},
1457         {0x0000008c, 0xff010000},
1458         {0x0000008e, 0xffffefff},
1459         {0x0000008f, 0xfff3efff},
1460         {0x00000090, 0xfff3efbf},
1461         {0x00000094, 0x00101101},
1462         {0x00000095, 0x00000fff},
1463         {0x00000096, 0x00116fff},
1464         {0x00000097, 0x60010000},
1465         {0x00000098, 0x10010000},
1466         {0x00000099, 0x00006000},
1467         {0x0000009a, 0x00001000},
1468         {0x0000009f, 0x00a17730}
1469 };
1470
1471 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1472         {0x0000006f, 0x03044000},
1473         {0x00000070, 0x0480c018},
1474         {0x00000071, 0x00000040},
1475         {0x00000072, 0x01000000},
1476         {0x00000074, 0x000000ff},
1477         {0x00000075, 0x00143400},
1478         {0x00000076, 0x08ec0800},
1479         {0x00000077, 0x040000cc},
1480         {0x00000079, 0x00000000},
1481         {0x0000007a, 0x21000409},
1482         {0x0000007c, 0x00000000},
1483         {0x0000007d, 0xe8000000},
1484         {0x0000007e, 0x044408a8},
1485         {0x0000007f, 0x00000003},
1486         {0x00000080, 0x00000000},
1487         {0x00000081, 0x01000000},
1488         {0x00000082, 0x02000000},
1489         {0x00000083, 0x00000000},
1490         {0x00000084, 0xe3f3e4f4},
1491         {0x00000085, 0x00052024},
1492         {0x00000087, 0x00000000},
1493         {0x00000088, 0x66036603},
1494         {0x00000089, 0x01000000},
1495         {0x0000008b, 0x1c0a0000},
1496         {0x0000008c, 0xff010000},
1497         {0x0000008e, 0xffffefff},
1498         {0x0000008f, 0xfff3efff},
1499         {0x00000090, 0xfff3efbf},
1500         {0x00000094, 0x00101101},
1501         {0x00000095, 0x00000fff},
1502         {0x00000096, 0x00116fff},
1503         {0x00000097, 0x60010000},
1504         {0x00000098, 0x10010000},
1505         {0x00000099, 0x00006000},
1506         {0x0000009a, 0x00001000},
1507         {0x0000009f, 0x00a07730}
1508 };
1509
1510 /* ucode loading */
1511 int si_mc_load_microcode(struct radeon_device *rdev)
1512 {
1513         const __be32 *fw_data = NULL;
1514         const __le32 *new_fw_data = NULL;
1515         u32 running, blackout = 0;
1516         u32 *io_mc_regs = NULL;
1517         const __le32 *new_io_mc_regs = NULL;
1518         int i, regs_size, ucode_size;
1519
1520         if (!rdev->mc_fw)
1521                 return -EINVAL;
1522
1523         if (rdev->new_fw) {
1524                 const struct mc_firmware_header_v1_0 *hdr =
1525                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1526
1527                 radeon_ucode_print_mc_hdr(&hdr->header);
1528                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1529                 new_io_mc_regs = (const __le32 *)
1530                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1531                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1532                 new_fw_data = (const __le32 *)
1533                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1534         } else {
1535                 ucode_size = rdev->mc_fw->size / 4;
1536
1537                 switch (rdev->family) {
1538                 case CHIP_TAHITI:
1539                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1540                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1541                         break;
1542                 case CHIP_PITCAIRN:
1543                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1544                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1545                         break;
1546                 case CHIP_VERDE:
1547                 default:
1548                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1549                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1550                         break;
1551                 case CHIP_OLAND:
1552                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1553                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1554                         break;
1555                 case CHIP_HAINAN:
1556                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1557                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1558                         break;
1559                 }
1560                 fw_data = (const __be32 *)rdev->mc_fw->data;
1561         }
1562
1563         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1564
1565         if (running == 0) {
1566                 if (running) {
1567                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1568                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1569                 }
1570
1571                 /* reset the engine and set to writable */
1572                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1573                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1574
1575                 /* load mc io regs */
1576                 for (i = 0; i < regs_size; i++) {
1577                         if (rdev->new_fw) {
1578                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1579                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1580                         } else {
1581                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1582                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1583                         }
1584                 }
1585                 /* load the MC ucode */
1586                 for (i = 0; i < ucode_size; i++) {
1587                         if (rdev->new_fw)
1588                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1589                         else
1590                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1591                 }
1592
1593                 /* put the engine back into the active state */
1594                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1595                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1596                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1597
1598                 /* wait for training to complete */
1599                 for (i = 0; i < rdev->usec_timeout; i++) {
1600                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1601                                 break;
1602                         udelay(1);
1603                 }
1604                 for (i = 0; i < rdev->usec_timeout; i++) {
1605                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1606                                 break;
1607                         udelay(1);
1608                 }
1609
1610                 if (running)
1611                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1612         }
1613
1614         return 0;
1615 }
1616
1617 static int si_init_microcode(struct radeon_device *rdev)
1618 {
1619         const char *chip_name;
1620         const char *new_chip_name;
1621         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1622         size_t smc_req_size, mc2_req_size;
1623         char fw_name[30];
1624         int err;
1625         int new_fw = 0;
1626
1627         DRM_DEBUG("\n");
1628
1629         switch (rdev->family) {
1630         case CHIP_TAHITI:
1631                 chip_name = "TAHITI";
1632                 new_chip_name = "tahiti";
1633                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1634                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1635                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1636                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1637                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1638                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1639                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1640                 break;
1641         case CHIP_PITCAIRN:
1642                 chip_name = "PITCAIRN";
1643                 new_chip_name = "pitcairn";
1644                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1645                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1646                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1647                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1648                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1649                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1650                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1651                 break;
1652         case CHIP_VERDE:
1653                 chip_name = "VERDE";
1654                 new_chip_name = "verde";
1655                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1656                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1657                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1658                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1659                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1660                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1661                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1662                 break;
1663         case CHIP_OLAND:
1664                 chip_name = "OLAND";
1665                 new_chip_name = "oland";
1666                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1667                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1668                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1669                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1670                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1671                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1672                 break;
1673         case CHIP_HAINAN:
1674                 chip_name = "HAINAN";
1675                 new_chip_name = "hainan";
1676                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1677                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1678                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1679                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1680                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1681                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1682                 break;
1683         default: BUG();
1684         }
1685
1686         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1687
1688         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1689         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1690         if (err) {
1691                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1692                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1693                 if (err)
1694                         goto out;
1695                 if (rdev->pfp_fw->size != pfp_req_size) {
1696                         printk(KERN_ERR
1697                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1698                                rdev->pfp_fw->size, fw_name);
1699                         err = -EINVAL;
1700                         goto out;
1701                 }
1702         } else {
1703                 err = radeon_ucode_validate(rdev->pfp_fw);
1704                 if (err) {
1705                         printk(KERN_ERR
1706                                "si_cp: validation failed for firmware \"%s\"\n",
1707                                fw_name);
1708                         goto out;
1709                 } else {
1710                         new_fw++;
1711                 }
1712         }
1713
1714         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1715         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1716         if (err) {
1717                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1718                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1719                 if (err)
1720                         goto out;
1721                 if (rdev->me_fw->size != me_req_size) {
1722                         printk(KERN_ERR
1723                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1724                                rdev->me_fw->size, fw_name);
1725                         err = -EINVAL;
1726                 }
1727         } else {
1728                 err = radeon_ucode_validate(rdev->me_fw);
1729                 if (err) {
1730                         printk(KERN_ERR
1731                                "si_cp: validation failed for firmware \"%s\"\n",
1732                                fw_name);
1733                         goto out;
1734                 } else {
1735                         new_fw++;
1736                 }
1737         }
1738
1739         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1740         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1741         if (err) {
1742                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1743                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1744                 if (err)
1745                         goto out;
1746                 if (rdev->ce_fw->size != ce_req_size) {
1747                         printk(KERN_ERR
1748                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1749                                rdev->ce_fw->size, fw_name);
1750                         err = -EINVAL;
1751                 }
1752         } else {
1753                 err = radeon_ucode_validate(rdev->ce_fw);
1754                 if (err) {
1755                         printk(KERN_ERR
1756                                "si_cp: validation failed for firmware \"%s\"\n",
1757                                fw_name);
1758                         goto out;
1759                 } else {
1760                         new_fw++;
1761                 }
1762         }
1763
1764         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1765         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1766         if (err) {
1767                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1768                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1769                 if (err)
1770                         goto out;
1771                 if (rdev->rlc_fw->size != rlc_req_size) {
1772                         printk(KERN_ERR
1773                                "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1774                                rdev->rlc_fw->size, fw_name);
1775                         err = -EINVAL;
1776                 }
1777         } else {
1778                 err = radeon_ucode_validate(rdev->rlc_fw);
1779                 if (err) {
1780                         printk(KERN_ERR
1781                                "si_cp: validation failed for firmware \"%s\"\n",
1782                                fw_name);
1783                         goto out;
1784                 } else {
1785                         new_fw++;
1786                 }
1787         }
1788
1789         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1790         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1791         if (err) {
1792                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1793                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1794                 if (err) {
1795                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1796                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1797                         if (err)
1798                                 goto out;
1799                 }
1800                 if ((rdev->mc_fw->size != mc_req_size) &&
1801                     (rdev->mc_fw->size != mc2_req_size)) {
1802                         printk(KERN_ERR
1803                                "si_mc: Bogus length %zu in firmware \"%s\"\n",
1804                                rdev->mc_fw->size, fw_name);
1805                         err = -EINVAL;
1806                 }
1807                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1808         } else {
1809                 err = radeon_ucode_validate(rdev->mc_fw);
1810                 if (err) {
1811                         printk(KERN_ERR
1812                                "si_cp: validation failed for firmware \"%s\"\n",
1813                                fw_name);
1814                         goto out;
1815                 } else {
1816                         new_fw++;
1817                 }
1818         }
1819
1820         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1821         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1822         if (err) {
1823                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1824                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1825                 if (err) {
1826                         printk(KERN_ERR
1827                                "smc: error loading firmware \"%s\"\n",
1828                                fw_name);
1829                         release_firmware(rdev->smc_fw);
1830                         rdev->smc_fw = NULL;
1831                         err = 0;
1832                 } else if (rdev->smc_fw->size != smc_req_size) {
1833                         printk(KERN_ERR
1834                                "si_smc: Bogus length %zu in firmware \"%s\"\n",
1835                                rdev->smc_fw->size, fw_name);
1836                         err = -EINVAL;
1837                 }
1838         } else {
1839                 err = radeon_ucode_validate(rdev->smc_fw);
1840                 if (err) {
1841                         printk(KERN_ERR
1842                                "si_cp: validation failed for firmware \"%s\"\n",
1843                                fw_name);
1844                         goto out;
1845                 } else {
1846                         new_fw++;
1847                 }
1848         }
1849
1850         if (new_fw == 0) {
1851                 rdev->new_fw = false;
1852         } else if (new_fw < 6) {
1853                 printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1854                 err = -EINVAL;
1855         } else {
1856                 rdev->new_fw = true;
1857         }
1858 out:
1859         if (err) {
1860                 if (err != -EINVAL)
1861                         printk(KERN_ERR
1862                                "si_cp: Failed to load firmware \"%s\"\n",
1863                                fw_name);
1864                 release_firmware(rdev->pfp_fw);
1865                 rdev->pfp_fw = NULL;
1866                 release_firmware(rdev->me_fw);
1867                 rdev->me_fw = NULL;
1868                 release_firmware(rdev->ce_fw);
1869                 rdev->ce_fw = NULL;
1870                 release_firmware(rdev->rlc_fw);
1871                 rdev->rlc_fw = NULL;
1872                 release_firmware(rdev->mc_fw);
1873                 rdev->mc_fw = NULL;
1874                 release_firmware(rdev->smc_fw);
1875                 rdev->smc_fw = NULL;
1876         }
1877         return err;
1878 }
1879
1880 /* watermark setup */
1881 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1882                                    struct radeon_crtc *radeon_crtc,
1883                                    struct drm_display_mode *mode,
1884                                    struct drm_display_mode *other_mode)
1885 {
1886         u32 tmp, buffer_alloc, i;
1887         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1888         /*
1889          * Line Buffer Setup
1890          * There are 3 line buffers, each one shared by 2 display controllers.
1891          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1892          * the display controllers.  The paritioning is done via one of four
1893          * preset allocations specified in bits 21:20:
1894          *  0 - half lb
1895          *  2 - whole lb, other crtc must be disabled
1896          */
1897         /* this can get tricky if we have two large displays on a paired group
1898          * of crtcs.  Ideally for multiple large displays we'd assign them to
1899          * non-linked crtcs for maximum line buffer allocation.
1900          */
1901         if (radeon_crtc->base.enabled && mode) {
1902                 if (other_mode) {
1903                         tmp = 0; /* 1/2 */
1904                         buffer_alloc = 1;
1905                 } else {
1906                         tmp = 2; /* whole */
1907                         buffer_alloc = 2;
1908                 }
1909         } else {
1910                 tmp = 0;
1911                 buffer_alloc = 0;
1912         }
1913
1914         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1915                DC_LB_MEMORY_CONFIG(tmp));
1916
1917         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1918                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1919         for (i = 0; i < rdev->usec_timeout; i++) {
1920                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1921                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1922                         break;
1923                 udelay(1);
1924         }
1925
1926         if (radeon_crtc->base.enabled && mode) {
1927                 switch (tmp) {
1928                 case 0:
1929                 default:
1930                         return 4096 * 2;
1931                 case 2:
1932                         return 8192 * 2;
1933                 }
1934         }
1935
1936         /* controller not enabled, so no lb used */
1937         return 0;
1938 }
1939
1940 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1941 {
1942         u32 tmp = RREG32(MC_SHARED_CHMAP);
1943
1944         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1945         case 0:
1946         default:
1947                 return 1;
1948         case 1:
1949                 return 2;
1950         case 2:
1951                 return 4;
1952         case 3:
1953                 return 8;
1954         case 4:
1955                 return 3;
1956         case 5:
1957                 return 6;
1958         case 6:
1959                 return 10;
1960         case 7:
1961                 return 12;
1962         case 8:
1963                 return 16;
1964         }
1965 }
1966
1967 struct dce6_wm_params {
1968         u32 dram_channels; /* number of dram channels */
1969         u32 yclk;          /* bandwidth per dram data pin in kHz */
1970         u32 sclk;          /* engine clock in kHz */
1971         u32 disp_clk;      /* display clock in kHz */
1972         u32 src_width;     /* viewport width */
1973         u32 active_time;   /* active display time in ns */
1974         u32 blank_time;    /* blank time in ns */
1975         bool interlaced;    /* mode is interlaced */
1976         fixed20_12 vsc;    /* vertical scale ratio */
1977         u32 num_heads;     /* number of active crtcs */
1978         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1979         u32 lb_size;       /* line buffer allocated to pipe */
1980         u32 vtaps;         /* vertical scaler taps */
1981 };
1982
1983 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1984 {
1985         /* Calculate raw DRAM Bandwidth */
1986         fixed20_12 dram_efficiency; /* 0.7 */
1987         fixed20_12 yclk, dram_channels, bandwidth;
1988         fixed20_12 a;
1989
1990         a.full = dfixed_const(1000);
1991         yclk.full = dfixed_const(wm->yclk);
1992         yclk.full = dfixed_div(yclk, a);
1993         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1994         a.full = dfixed_const(10);
1995         dram_efficiency.full = dfixed_const(7);
1996         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1997         bandwidth.full = dfixed_mul(dram_channels, yclk);
1998         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1999
2000         return dfixed_trunc(bandwidth);
2001 }
2002
2003 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2004 {
2005         /* Calculate DRAM Bandwidth and the part allocated to display. */
2006         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2007         fixed20_12 yclk, dram_channels, bandwidth;
2008         fixed20_12 a;
2009
2010         a.full = dfixed_const(1000);
2011         yclk.full = dfixed_const(wm->yclk);
2012         yclk.full = dfixed_div(yclk, a);
2013         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2014         a.full = dfixed_const(10);
2015         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2016         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2017         bandwidth.full = dfixed_mul(dram_channels, yclk);
2018         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2019
2020         return dfixed_trunc(bandwidth);
2021 }
2022
2023 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2024 {
2025         /* Calculate the display Data return Bandwidth */
2026         fixed20_12 return_efficiency; /* 0.8 */
2027         fixed20_12 sclk, bandwidth;
2028         fixed20_12 a;
2029
2030         a.full = dfixed_const(1000);
2031         sclk.full = dfixed_const(wm->sclk);
2032         sclk.full = dfixed_div(sclk, a);
2033         a.full = dfixed_const(10);
2034         return_efficiency.full = dfixed_const(8);
2035         return_efficiency.full = dfixed_div(return_efficiency, a);
2036         a.full = dfixed_const(32);
2037         bandwidth.full = dfixed_mul(a, sclk);
2038         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2039
2040         return dfixed_trunc(bandwidth);
2041 }
2042
2043 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2044 {
2045         return 32;
2046 }
2047
2048 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2049 {
2050         /* Calculate the DMIF Request Bandwidth */
2051         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2052         fixed20_12 disp_clk, sclk, bandwidth;
2053         fixed20_12 a, b1, b2;
2054         u32 min_bandwidth;
2055
2056         a.full = dfixed_const(1000);
2057         disp_clk.full = dfixed_const(wm->disp_clk);
2058         disp_clk.full = dfixed_div(disp_clk, a);
2059         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2060         b1.full = dfixed_mul(a, disp_clk);
2061
2062         a.full = dfixed_const(1000);
2063         sclk.full = dfixed_const(wm->sclk);
2064         sclk.full = dfixed_div(sclk, a);
2065         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2066         b2.full = dfixed_mul(a, sclk);
2067
2068         a.full = dfixed_const(10);
2069         disp_clk_request_efficiency.full = dfixed_const(8);
2070         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2071
2072         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2073
2074         a.full = dfixed_const(min_bandwidth);
2075         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2076
2077         return dfixed_trunc(bandwidth);
2078 }
2079
2080 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2081 {
2082         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2083         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2084         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2085         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2086
2087         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2088 }
2089
2090 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2091 {
2092         /* Calculate the display mode Average Bandwidth
2093          * DisplayMode should contain the source and destination dimensions,
2094          * timing, etc.
2095          */
2096         fixed20_12 bpp;
2097         fixed20_12 line_time;
2098         fixed20_12 src_width;
2099         fixed20_12 bandwidth;
2100         fixed20_12 a;
2101
2102         a.full = dfixed_const(1000);
2103         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2104         line_time.full = dfixed_div(line_time, a);
2105         bpp.full = dfixed_const(wm->bytes_per_pixel);
2106         src_width.full = dfixed_const(wm->src_width);
2107         bandwidth.full = dfixed_mul(src_width, bpp);
2108         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2109         bandwidth.full = dfixed_div(bandwidth, line_time);
2110
2111         return dfixed_trunc(bandwidth);
2112 }
2113
2114 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2115 {
2116         /* First calcualte the latency in ns */
2117         u32 mc_latency = 2000; /* 2000 ns. */
2118         u32 available_bandwidth = dce6_available_bandwidth(wm);
2119         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2120         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2121         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2122         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2123                 (wm->num_heads * cursor_line_pair_return_time);
2124         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2125         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2126         u32 tmp, dmif_size = 12288;
2127         fixed20_12 a, b, c;
2128
2129         if (wm->num_heads == 0)
2130                 return 0;
2131
2132         a.full = dfixed_const(2);
2133         b.full = dfixed_const(1);
2134         if ((wm->vsc.full > a.full) ||
2135             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2136             (wm->vtaps >= 5) ||
2137             ((wm->vsc.full >= a.full) && wm->interlaced))
2138                 max_src_lines_per_dst_line = 4;
2139         else
2140                 max_src_lines_per_dst_line = 2;
2141
2142         a.full = dfixed_const(available_bandwidth);
2143         b.full = dfixed_const(wm->num_heads);
2144         a.full = dfixed_div(a, b);
2145
2146         b.full = dfixed_const(mc_latency + 512);
2147         c.full = dfixed_const(wm->disp_clk);
2148         b.full = dfixed_div(b, c);
2149
2150         c.full = dfixed_const(dmif_size);
2151         b.full = dfixed_div(c, b);
2152
2153         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2154
2155         b.full = dfixed_const(1000);
2156         c.full = dfixed_const(wm->disp_clk);
2157         b.full = dfixed_div(c, b);
2158         c.full = dfixed_const(wm->bytes_per_pixel);
2159         b.full = dfixed_mul(b, c);
2160
2161         lb_fill_bw = min(tmp, dfixed_trunc(b));
2162
2163         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2164         b.full = dfixed_const(1000);
2165         c.full = dfixed_const(lb_fill_bw);
2166         b.full = dfixed_div(c, b);
2167         a.full = dfixed_div(a, b);
2168         line_fill_time = dfixed_trunc(a);
2169
2170         if (line_fill_time < wm->active_time)
2171                 return latency;
2172         else
2173                 return latency + (line_fill_time - wm->active_time);
2174
2175 }
2176
2177 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2178 {
2179         if (dce6_average_bandwidth(wm) <=
2180             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2181                 return true;
2182         else
2183                 return false;
2184 };
2185
2186 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2187 {
2188         if (dce6_average_bandwidth(wm) <=
2189             (dce6_available_bandwidth(wm) / wm->num_heads))
2190                 return true;
2191         else
2192                 return false;
2193 };
2194
2195 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2196 {
2197         u32 lb_partitions = wm->lb_size / wm->src_width;
2198         u32 line_time = wm->active_time + wm->blank_time;
2199         u32 latency_tolerant_lines;
2200         u32 latency_hiding;
2201         fixed20_12 a;
2202
2203         a.full = dfixed_const(1);
2204         if (wm->vsc.full > a.full)
2205                 latency_tolerant_lines = 1;
2206         else {
2207                 if (lb_partitions <= (wm->vtaps + 1))
2208                         latency_tolerant_lines = 1;
2209                 else
2210                         latency_tolerant_lines = 2;
2211         }
2212
2213         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2214
2215         if (dce6_latency_watermark(wm) <= latency_hiding)
2216                 return true;
2217         else
2218                 return false;
2219 }
2220
2221 static void dce6_program_watermarks(struct radeon_device *rdev,
2222                                          struct radeon_crtc *radeon_crtc,
2223                                          u32 lb_size, u32 num_heads)
2224 {
2225         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2226         struct dce6_wm_params wm_low, wm_high;
2227         u32 dram_channels;
2228         u32 pixel_period;
2229         u32 line_time = 0;
2230         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2231         u32 priority_a_mark = 0, priority_b_mark = 0;
2232         u32 priority_a_cnt = PRIORITY_OFF;
2233         u32 priority_b_cnt = PRIORITY_OFF;
2234         u32 tmp, arb_control3;
2235         fixed20_12 a, b, c;
2236
2237         if (radeon_crtc->base.enabled && num_heads && mode) {
2238                 pixel_period = 1000000 / (u32)mode->clock;
2239                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2240                 priority_a_cnt = 0;
2241                 priority_b_cnt = 0;
2242
2243                 if (rdev->family == CHIP_ARUBA)
2244                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2245                 else
2246                         dram_channels = si_get_number_of_dram_channels(rdev);
2247
2248                 /* watermark for high clocks */
2249                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2250                         wm_high.yclk =
2251                                 radeon_dpm_get_mclk(rdev, false) * 10;
2252                         wm_high.sclk =
2253                                 radeon_dpm_get_sclk(rdev, false) * 10;
2254                 } else {
2255                         wm_high.yclk = rdev->pm.current_mclk * 10;
2256                         wm_high.sclk = rdev->pm.current_sclk * 10;
2257                 }
2258
2259                 wm_high.disp_clk = mode->clock;
2260                 wm_high.src_width = mode->crtc_hdisplay;
2261                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2262                 wm_high.blank_time = line_time - wm_high.active_time;
2263                 wm_high.interlaced = false;
2264                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2265                         wm_high.interlaced = true;
2266                 wm_high.vsc = radeon_crtc->vsc;
2267                 wm_high.vtaps = 1;
2268                 if (radeon_crtc->rmx_type != RMX_OFF)
2269                         wm_high.vtaps = 2;
2270                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2271                 wm_high.lb_size = lb_size;
2272                 wm_high.dram_channels = dram_channels;
2273                 wm_high.num_heads = num_heads;
2274
2275                 /* watermark for low clocks */
2276                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2277                         wm_low.yclk =
2278                                 radeon_dpm_get_mclk(rdev, true) * 10;
2279                         wm_low.sclk =
2280                                 radeon_dpm_get_sclk(rdev, true) * 10;
2281                 } else {
2282                         wm_low.yclk = rdev->pm.current_mclk * 10;
2283                         wm_low.sclk = rdev->pm.current_sclk * 10;
2284                 }
2285
2286                 wm_low.disp_clk = mode->clock;
2287                 wm_low.src_width = mode->crtc_hdisplay;
2288                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2289                 wm_low.blank_time = line_time - wm_low.active_time;
2290                 wm_low.interlaced = false;
2291                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2292                         wm_low.interlaced = true;
2293                 wm_low.vsc = radeon_crtc->vsc;
2294                 wm_low.vtaps = 1;
2295                 if (radeon_crtc->rmx_type != RMX_OFF)
2296                         wm_low.vtaps = 2;
2297                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2298                 wm_low.lb_size = lb_size;
2299                 wm_low.dram_channels = dram_channels;
2300                 wm_low.num_heads = num_heads;
2301
2302                 /* set for high clocks */
2303                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2304                 /* set for low clocks */
2305                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2306
2307                 /* possibly force display priority to high */
2308                 /* should really do this at mode validation time... */
2309                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2310                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2311                     !dce6_check_latency_hiding(&wm_high) ||
2312                     (rdev->disp_priority == 2)) {
2313                         DRM_DEBUG_KMS("force priority to high\n");
2314                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2315                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2316                 }
2317                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2318                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2319                     !dce6_check_latency_hiding(&wm_low) ||
2320                     (rdev->disp_priority == 2)) {
2321                         DRM_DEBUG_KMS("force priority to high\n");
2322                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2323                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2324                 }
2325
2326                 a.full = dfixed_const(1000);
2327                 b.full = dfixed_const(mode->clock);
2328                 b.full = dfixed_div(b, a);
2329                 c.full = dfixed_const(latency_watermark_a);
2330                 c.full = dfixed_mul(c, b);
2331                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2332                 c.full = dfixed_div(c, a);
2333                 a.full = dfixed_const(16);
2334                 c.full = dfixed_div(c, a);
2335                 priority_a_mark = dfixed_trunc(c);
2336                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2337
2338                 a.full = dfixed_const(1000);
2339                 b.full = dfixed_const(mode->clock);
2340                 b.full = dfixed_div(b, a);
2341                 c.full = dfixed_const(latency_watermark_b);
2342                 c.full = dfixed_mul(c, b);
2343                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2344                 c.full = dfixed_div(c, a);
2345                 a.full = dfixed_const(16);
2346                 c.full = dfixed_div(c, a);
2347                 priority_b_mark = dfixed_trunc(c);
2348                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2349         }
2350
2351         /* select wm A */
2352         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2353         tmp = arb_control3;
2354         tmp &= ~LATENCY_WATERMARK_MASK(3);
2355         tmp |= LATENCY_WATERMARK_MASK(1);
2356         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2357         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2358                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2359                 LATENCY_HIGH_WATERMARK(line_time)));
2360         /* select wm B */
2361         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2362         tmp &= ~LATENCY_WATERMARK_MASK(3);
2363         tmp |= LATENCY_WATERMARK_MASK(2);
2364         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2365         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2366                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2367                 LATENCY_HIGH_WATERMARK(line_time)));
2368         /* restore original selection */
2369         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2370
2371         /* write the priority marks */
2372         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2373         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2374
2375         /* save values for DPM */
2376         radeon_crtc->line_time = line_time;
2377         radeon_crtc->wm_high = latency_watermark_a;
2378         radeon_crtc->wm_low = latency_watermark_b;
2379 }
2380
2381 void dce6_bandwidth_update(struct radeon_device *rdev)
2382 {
2383         struct drm_display_mode *mode0 = NULL;
2384         struct drm_display_mode *mode1 = NULL;
2385         u32 num_heads = 0, lb_size;
2386         int i;
2387
2388         if (!rdev->mode_info.mode_config_initialized)
2389                 return;
2390
2391         radeon_update_display_priority(rdev);
2392
2393         for (i = 0; i < rdev->num_crtc; i++) {
2394                 if (rdev->mode_info.crtcs[i]->base.enabled)
2395                         num_heads++;
2396         }
2397         for (i = 0; i < rdev->num_crtc; i += 2) {
2398                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2399                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2400                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2401                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2402                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2403                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2404         }
2405 }
2406
2407 /*
2408  * Core functions
2409  */
2410 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2411 {
2412         const u32 num_tile_mode_states = 32;
2413         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2414
2415         switch (rdev->config.si.mem_row_size_in_kb) {
2416         case 1:
2417                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2418                 break;
2419         case 2:
2420         default:
2421                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2422                 break;
2423         case 4:
2424                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2425                 break;
2426         }
2427
2428         if ((rdev->family == CHIP_TAHITI) ||
2429             (rdev->family == CHIP_PITCAIRN)) {
2430                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2431                         switch (reg_offset) {
2432                         case 0:  /* non-AA compressed depth or any compressed stencil */
2433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2435                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2437                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2438                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2441                                 break;
2442                         case 1:  /* 2xAA/4xAA compressed depth only */
2443                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2445                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2448                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2451                                 break;
2452                         case 2:  /* 8xAA compressed depth only */
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2458                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2461                                 break;
2462                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2463                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2465                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2467                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2468                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2471                                 break;
2472                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2473                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2475                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2476                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2477                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2478                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2481                                 break;
2482                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2483                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2485                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486                                                  TILE_SPLIT(split_equal_to_row_size) |
2487                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2488                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2491                                 break;
2492                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2493                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2495                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2496                                                  TILE_SPLIT(split_equal_to_row_size) |
2497                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2498                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2501                                 break;
2502                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2503                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2505                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2506                                                  TILE_SPLIT(split_equal_to_row_size) |
2507                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2508                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2511                                 break;
2512                         case 8:  /* 1D and 1D Array Surfaces */
2513                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2514                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2516                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2517                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2518                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2520                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2521                                 break;
2522                         case 9:  /* Displayable maps. */
2523                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2524                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2525                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2527                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2528                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2531                                 break;
2532                         case 10:  /* Display 8bpp. */
2533                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2535                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2538                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541                                 break;
2542                         case 11:  /* Display 16bpp. */
2543                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2547                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2548                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2551                                 break;
2552                         case 12:  /* Display 32bpp. */
2553                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2557                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2558                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2560                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2561                                 break;
2562                         case 13:  /* Thin. */
2563                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2565                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2566                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2567                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2568                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2570                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571                                 break;
2572                         case 14:  /* Thin 8 bpp. */
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2576                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2577                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2578                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2581                                 break;
2582                         case 15:  /* Thin 16 bpp. */
2583                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2585                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2587                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2588                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2591                                 break;
2592                         case 16:  /* Thin 32 bpp. */
2593                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2595                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2596                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2597                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2598                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2601                                 break;
2602                         case 17:  /* Thin 64 bpp. */
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2606                                                  TILE_SPLIT(split_equal_to_row_size) |
2607                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2608                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2611                                 break;
2612                         case 21:  /* 8 bpp PRT. */
2613                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2615                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2617                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2618                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2619                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2621                                 break;
2622                         case 22:  /* 16 bpp PRT */
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2627                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2628                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2630                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2631                                 break;
2632                         case 23:  /* 32 bpp PRT */
2633                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2637                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2638                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2641                                 break;
2642                         case 24:  /* 64 bpp PRT */
2643                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2645                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2646                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2648                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2650                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2651                                 break;
2652                         case 25:  /* 128 bpp PRT */
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2656                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2657                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2658                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2661                                 break;
2662                         default:
2663                                 gb_tile_moden = 0;
2664                                 break;
2665                         }
2666                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2667                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2668                 }
2669         } else if ((rdev->family == CHIP_VERDE) ||
2670                    (rdev->family == CHIP_OLAND) ||
2671                    (rdev->family == CHIP_HAINAN)) {
2672                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2673                         switch (reg_offset) {
2674                         case 0:  /* non-AA compressed depth or any compressed stencil */
2675                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2677                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2679                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2680                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2682                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2683                                 break;
2684                         case 1:  /* 2xAA/4xAA compressed depth only */
2685                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2687                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2688                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2689                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2690                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2693                                 break;
2694                         case 2:  /* 8xAA compressed depth only */
2695                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2697                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2700                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2702                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2703                                 break;
2704                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2708                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2709                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2710                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2712                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2713                                 break;
2714                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2715                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2716                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2717                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2719                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2720                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2723                                 break;
2724                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2725                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2727                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728                                                  TILE_SPLIT(split_equal_to_row_size) |
2729                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2730                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2732                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2733                                 break;
2734                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2735                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738                                                  TILE_SPLIT(split_equal_to_row_size) |
2739                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2740                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2743                                 break;
2744                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2745                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2747                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                                                  TILE_SPLIT(split_equal_to_row_size) |
2749                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2750                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2753                                 break;
2754                         case 8:  /* 1D and 1D Array Surfaces */
2755                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2757                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2759                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2760                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2762                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2763                                 break;
2764                         case 9:  /* Displayable maps. */
2765                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2766                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2767                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2769                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2770                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773                                 break;
2774                         case 10:  /* Display 8bpp. */
2775                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2777                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2779                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2780                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2782                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2783                                 break;
2784                         case 11:  /* Display 16bpp. */
2785                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2787                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2789                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2790                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793                                 break;
2794                         case 12:  /* Display 32bpp. */
2795                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2797                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2799                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2800                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2802                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2803                                 break;
2804                         case 13:  /* Thin. */
2805                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2807                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2809                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2810                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2812                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2813                                 break;
2814                         case 14:  /* Thin 8 bpp. */
2815                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2817                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2819                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2820                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2822                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2823                                 break;
2824                         case 15:  /* Thin 16 bpp. */
2825                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2827                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2829                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2830                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2832                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2833                                 break;
2834                         case 16:  /* Thin 32 bpp. */
2835                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2837                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2839                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2840                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2843                                 break;
2844                         case 17:  /* Thin 64 bpp. */
2845                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2847                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2848                                                  TILE_SPLIT(split_equal_to_row_size) |
2849                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2850                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2853                                 break;
2854                         case 21:  /* 8 bpp PRT. */
2855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2858                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2859                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2860                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863                                 break;
2864                         case 22:  /* 16 bpp PRT */
2865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2868                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2870                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2872                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2873                                 break;
2874                         case 23:  /* 32 bpp PRT */
2875                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2877                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2878                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2879                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2880                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2882                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2883                                 break;
2884                         case 24:  /* 64 bpp PRT */
2885                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2887                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2888                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2889                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2890                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2892                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2893                                 break;
2894                         case 25:  /* 128 bpp PRT */
2895                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2897                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2898                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2899                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2900                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2903                                 break;
2904                         default:
2905                                 gb_tile_moden = 0;
2906                                 break;
2907                         }
2908                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2909                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2910                 }
2911         } else
2912                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2913 }
2914
2915 static void si_select_se_sh(struct radeon_device *rdev,
2916                             u32 se_num, u32 sh_num)
2917 {
2918         u32 data = INSTANCE_BROADCAST_WRITES;
2919
2920         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2921                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2922         else if (se_num == 0xffffffff)
2923                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2924         else if (sh_num == 0xffffffff)
2925                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2926         else
2927                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2928         WREG32(GRBM_GFX_INDEX, data);
2929 }
2930
2931 static u32 si_create_bitmask(u32 bit_width)
2932 {
2933         u32 i, mask = 0;
2934
2935         for (i = 0; i < bit_width; i++) {
2936                 mask <<= 1;
2937                 mask |= 1;
2938         }
2939         return mask;
2940 }
2941
2942 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2943 {
2944         u32 data, mask;
2945
2946         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2947         if (data & 1)
2948                 data &= INACTIVE_CUS_MASK;
2949         else
2950                 data = 0;
2951         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2952
2953         data >>= INACTIVE_CUS_SHIFT;
2954
2955         mask = si_create_bitmask(cu_per_sh);
2956
2957         return ~data & mask;
2958 }
2959
2960 static void si_setup_spi(struct radeon_device *rdev,
2961                          u32 se_num, u32 sh_per_se,
2962                          u32 cu_per_sh)
2963 {
2964         int i, j, k;
2965         u32 data, mask, active_cu;
2966
2967         for (i = 0; i < se_num; i++) {
2968                 for (j = 0; j < sh_per_se; j++) {
2969                         si_select_se_sh(rdev, i, j);
2970                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2971                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2972
2973                         mask = 1;
2974                         for (k = 0; k < 16; k++) {
2975                                 mask <<= k;
2976                                 if (active_cu & mask) {
2977                                         data &= ~mask;
2978                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2979                                         break;
2980                                 }
2981                         }
2982                 }
2983         }
2984         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2985 }
2986
2987 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2988                               u32 max_rb_num_per_se,
2989                               u32 sh_per_se)
2990 {
2991         u32 data, mask;
2992
2993         data = RREG32(CC_RB_BACKEND_DISABLE);
2994         if (data & 1)
2995                 data &= BACKEND_DISABLE_MASK;
2996         else
2997                 data = 0;
2998         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2999
3000         data >>= BACKEND_DISABLE_SHIFT;
3001
3002         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3003
3004         return data & mask;
3005 }
3006
3007 static void si_setup_rb(struct radeon_device *rdev,
3008                         u32 se_num, u32 sh_per_se,
3009                         u32 max_rb_num_per_se)
3010 {
3011         int i, j;
3012         u32 data, mask;
3013         u32 disabled_rbs = 0;
3014         u32 enabled_rbs = 0;
3015
3016         for (i = 0; i < se_num; i++) {
3017                 for (j = 0; j < sh_per_se; j++) {
3018                         si_select_se_sh(rdev, i, j);
3019                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3020                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3021                 }
3022         }
3023         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3024
3025         mask = 1;
3026         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3027                 if (!(disabled_rbs & mask))
3028                         enabled_rbs |= mask;
3029                 mask <<= 1;
3030         }
3031
3032         rdev->config.si.backend_enable_mask = enabled_rbs;
3033
3034         for (i = 0; i < se_num; i++) {
3035                 si_select_se_sh(rdev, i, 0xffffffff);
3036                 data = 0;
3037                 for (j = 0; j < sh_per_se; j++) {
3038                         switch (enabled_rbs & 3) {
3039                         case 1:
3040                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3041                                 break;
3042                         case 2:
3043                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3044                                 break;
3045                         case 3:
3046                         default:
3047                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3048                                 break;
3049                         }
3050                         enabled_rbs >>= 2;
3051                 }
3052                 WREG32(PA_SC_RASTER_CONFIG, data);
3053         }
3054         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055 }
3056
3057 static void si_gpu_init(struct radeon_device *rdev)
3058 {
3059         u32 gb_addr_config = 0;
3060         u32 mc_shared_chmap, mc_arb_ramcfg;
3061         u32 sx_debug_1;
3062         u32 hdp_host_path_cntl;
3063         u32 tmp;
3064         int i, j;
3065
3066         switch (rdev->family) {
3067         case CHIP_TAHITI:
3068                 rdev->config.si.max_shader_engines = 2;
3069                 rdev->config.si.max_tile_pipes = 12;
3070                 rdev->config.si.max_cu_per_sh = 8;
3071                 rdev->config.si.max_sh_per_se = 2;
3072                 rdev->config.si.max_backends_per_se = 4;
3073                 rdev->config.si.max_texture_channel_caches = 12;
3074                 rdev->config.si.max_gprs = 256;
3075                 rdev->config.si.max_gs_threads = 32;
3076                 rdev->config.si.max_hw_contexts = 8;
3077
3078                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3079                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3080                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3081                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3082                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3083                 break;
3084         case CHIP_PITCAIRN:
3085                 rdev->config.si.max_shader_engines = 2;
3086                 rdev->config.si.max_tile_pipes = 8;
3087                 rdev->config.si.max_cu_per_sh = 5;
3088                 rdev->config.si.max_sh_per_se = 2;
3089                 rdev->config.si.max_backends_per_se = 4;
3090                 rdev->config.si.max_texture_channel_caches = 8;
3091                 rdev->config.si.max_gprs = 256;
3092                 rdev->config.si.max_gs_threads = 32;
3093                 rdev->config.si.max_hw_contexts = 8;
3094
3095                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3096                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3097                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3098                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3099                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3100                 break;
3101         case CHIP_VERDE:
3102         default:
3103                 rdev->config.si.max_shader_engines = 1;
3104                 rdev->config.si.max_tile_pipes = 4;
3105                 rdev->config.si.max_cu_per_sh = 5;
3106                 rdev->config.si.max_sh_per_se = 2;
3107                 rdev->config.si.max_backends_per_se = 4;
3108                 rdev->config.si.max_texture_channel_caches = 4;
3109                 rdev->config.si.max_gprs = 256;
3110                 rdev->config.si.max_gs_threads = 32;
3111                 rdev->config.si.max_hw_contexts = 8;
3112
3113                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3114                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3115                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3116                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3117                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3118                 break;
3119         case CHIP_OLAND:
3120                 rdev->config.si.max_shader_engines = 1;
3121                 rdev->config.si.max_tile_pipes = 4;
3122                 rdev->config.si.max_cu_per_sh = 6;
3123                 rdev->config.si.max_sh_per_se = 1;
3124                 rdev->config.si.max_backends_per_se = 2;
3125                 rdev->config.si.max_texture_channel_caches = 4;
3126                 rdev->config.si.max_gprs = 256;
3127                 rdev->config.si.max_gs_threads = 16;
3128                 rdev->config.si.max_hw_contexts = 8;
3129
3130                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3131                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3132                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3133                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3134                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3135                 break;
3136         case CHIP_HAINAN:
3137                 rdev->config.si.max_shader_engines = 1;
3138                 rdev->config.si.max_tile_pipes = 4;
3139                 rdev->config.si.max_cu_per_sh = 5;
3140                 rdev->config.si.max_sh_per_se = 1;
3141                 rdev->config.si.max_backends_per_se = 1;
3142                 rdev->config.si.max_texture_channel_caches = 2;
3143                 rdev->config.si.max_gprs = 256;
3144                 rdev->config.si.max_gs_threads = 16;
3145                 rdev->config.si.max_hw_contexts = 8;
3146
3147                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3148                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3149                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3150                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3151                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3152                 break;
3153         }
3154
3155         /* Initialize HDP */
3156         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3157                 WREG32((0x2c14 + j), 0x00000000);
3158                 WREG32((0x2c18 + j), 0x00000000);
3159                 WREG32((0x2c1c + j), 0x00000000);
3160                 WREG32((0x2c20 + j), 0x00000000);
3161                 WREG32((0x2c24 + j), 0x00000000);
3162         }
3163
3164         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3165
3166         evergreen_fix_pci_max_read_req_size(rdev);
3167
3168         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3169
3170         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3171         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3172
3173         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3174         rdev->config.si.mem_max_burst_length_bytes = 256;
3175         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3176         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3177         if (rdev->config.si.mem_row_size_in_kb > 4)
3178                 rdev->config.si.mem_row_size_in_kb = 4;
3179         /* XXX use MC settings? */
3180         rdev->config.si.shader_engine_tile_size = 32;
3181         rdev->config.si.num_gpus = 1;
3182         rdev->config.si.multi_gpu_tile_size = 64;
3183
3184         /* fix up row size */
3185         gb_addr_config &= ~ROW_SIZE_MASK;
3186         switch (rdev->config.si.mem_row_size_in_kb) {
3187         case 1:
3188         default:
3189                 gb_addr_config |= ROW_SIZE(0);
3190                 break;
3191         case 2:
3192                 gb_addr_config |= ROW_SIZE(1);
3193                 break;
3194         case 4:
3195                 gb_addr_config |= ROW_SIZE(2);
3196                 break;
3197         }
3198
3199         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3200          * not have bank info, so create a custom tiling dword.
3201          * bits 3:0   num_pipes
3202          * bits 7:4   num_banks
3203          * bits 11:8  group_size
3204          * bits 15:12 row_size
3205          */
3206         rdev->config.si.tile_config = 0;
3207         switch (rdev->config.si.num_tile_pipes) {
3208         case 1:
3209                 rdev->config.si.tile_config |= (0 << 0);
3210                 break;
3211         case 2:
3212                 rdev->config.si.tile_config |= (1 << 0);
3213                 break;
3214         case 4:
3215                 rdev->config.si.tile_config |= (2 << 0);
3216                 break;
3217         case 8:
3218         default:
3219                 /* XXX what about 12? */
3220                 rdev->config.si.tile_config |= (3 << 0);
3221                 break;
3222         }       
3223         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3224         case 0: /* four banks */
3225                 rdev->config.si.tile_config |= 0 << 4;
3226                 break;
3227         case 1: /* eight banks */
3228                 rdev->config.si.tile_config |= 1 << 4;
3229                 break;
3230         case 2: /* sixteen banks */
3231         default:
3232                 rdev->config.si.tile_config |= 2 << 4;
3233                 break;
3234         }
3235         rdev->config.si.tile_config |=
3236                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3237         rdev->config.si.tile_config |=
3238                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3239
3240         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3241         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3242         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3243         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3244         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3245         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3246         if (rdev->has_uvd) {
3247                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3248                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3249                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3250         }
3251
3252         si_tiling_mode_table_init(rdev);
3253
3254         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3255                     rdev->config.si.max_sh_per_se,
3256                     rdev->config.si.max_backends_per_se);
3257
3258         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3259                      rdev->config.si.max_sh_per_se,
3260                      rdev->config.si.max_cu_per_sh);
3261
3262         rdev->config.si.active_cus = 0;
3263         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3264                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3265                         rdev->config.si.active_cus +=
3266                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3267                 }
3268         }
3269
3270         /* set HW defaults for 3D engine */
3271         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3272                                      ROQ_IB2_START(0x2b)));
3273         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3274
3275         sx_debug_1 = RREG32(SX_DEBUG_1);
3276         WREG32(SX_DEBUG_1, sx_debug_1);
3277
3278         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3279
3280         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3281                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3282                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3283                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3284
3285         WREG32(VGT_NUM_INSTANCES, 1);
3286
3287         WREG32(CP_PERFMON_CNTL, 0);
3288
3289         WREG32(SQ_CONFIG, 0);
3290
3291         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3292                                           FORCE_EOV_MAX_REZ_CNT(255)));
3293
3294         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3295                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3296
3297         WREG32(VGT_GS_VERTEX_REUSE, 16);
3298         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3299
3300         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3301         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3302         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3303         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3304         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3305         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3306         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3307         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3308
3309         tmp = RREG32(HDP_MISC_CNTL);
3310         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3311         WREG32(HDP_MISC_CNTL, tmp);
3312
3313         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3314         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3315
3316         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3317
3318         udelay(50);
3319 }
3320
3321 /*
3322  * GPU scratch registers helpers function.
3323  */
3324 static void si_scratch_init(struct radeon_device *rdev)
3325 {
3326         int i;
3327
3328         rdev->scratch.num_reg = 7;
3329         rdev->scratch.reg_base = SCRATCH_REG0;
3330         for (i = 0; i < rdev->scratch.num_reg; i++) {
3331                 rdev->scratch.free[i] = true;
3332                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3333         }
3334 }
3335
3336 void si_fence_ring_emit(struct radeon_device *rdev,
3337                         struct radeon_fence *fence)
3338 {
3339         struct radeon_ring *ring = &rdev->ring[fence->ring];
3340         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3341
3342         /* flush read cache over gart */
3343         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3344         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3345         radeon_ring_write(ring, 0);
3346         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3347         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3348                           PACKET3_TC_ACTION_ENA |
3349                           PACKET3_SH_KCACHE_ACTION_ENA |
3350                           PACKET3_SH_ICACHE_ACTION_ENA);
3351         radeon_ring_write(ring, 0xFFFFFFFF);
3352         radeon_ring_write(ring, 0);
3353         radeon_ring_write(ring, 10); /* poll interval */
3354         /* EVENT_WRITE_EOP - flush caches, send int */
3355         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3356         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3357         radeon_ring_write(ring, lower_32_bits(addr));
3358         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3359         radeon_ring_write(ring, fence->seq);
3360         radeon_ring_write(ring, 0);
3361 }
3362
3363 /*
3364  * IB stuff
3365  */
3366 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3367 {
3368         struct radeon_ring *ring = &rdev->ring[ib->ring];
3369         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3370         u32 header;
3371
3372         if (ib->is_const_ib) {
3373                 /* set switch buffer packet before const IB */
3374                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3375                 radeon_ring_write(ring, 0);
3376
3377                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3378         } else {
3379                 u32 next_rptr;
3380                 if (ring->rptr_save_reg) {
3381                         next_rptr = ring->wptr + 3 + 4 + 8;
3382                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3383                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3384                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3385                         radeon_ring_write(ring, next_rptr);
3386                 } else if (rdev->wb.enabled) {
3387                         next_rptr = ring->wptr + 5 + 4 + 8;
3388                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3389                         radeon_ring_write(ring, (1 << 8));
3390                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3391                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3392                         radeon_ring_write(ring, next_rptr);
3393                 }
3394
3395                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3396         }
3397
3398         radeon_ring_write(ring, header);
3399         radeon_ring_write(ring,
3400 #ifdef __BIG_ENDIAN
3401                           (2 << 0) |
3402 #endif
3403                           (ib->gpu_addr & 0xFFFFFFFC));
3404         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3405         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3406
3407         if (!ib->is_const_ib) {
3408                 /* flush read cache over gart for this vmid */
3409                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3410                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3411                 radeon_ring_write(ring, vm_id);
3412                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3413                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3414                                   PACKET3_TC_ACTION_ENA |
3415                                   PACKET3_SH_KCACHE_ACTION_ENA |
3416                                   PACKET3_SH_ICACHE_ACTION_ENA);
3417                 radeon_ring_write(ring, 0xFFFFFFFF);
3418                 radeon_ring_write(ring, 0);
3419                 radeon_ring_write(ring, 10); /* poll interval */
3420         }
3421 }
3422
3423 /*
3424  * CP.
3425  */
3426 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3427 {
3428         if (enable)
3429                 WREG32(CP_ME_CNTL, 0);
3430         else {
3431                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3432                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3433                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3434                 WREG32(SCRATCH_UMSK, 0);
3435                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3436                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3437                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3438         }
3439         udelay(50);
3440 }
3441
3442 static int si_cp_load_microcode(struct radeon_device *rdev)
3443 {
3444         int i;
3445
3446         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3447                 return -EINVAL;
3448
3449         si_cp_enable(rdev, false);
3450
3451         if (rdev->new_fw) {
3452                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3453                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3454                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3455                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3456                 const struct gfx_firmware_header_v1_0 *me_hdr =
3457                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3458                 const __le32 *fw_data;
3459                 u32 fw_size;
3460
3461                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3462                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3463                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3464
3465                 /* PFP */
3466                 fw_data = (const __le32 *)
3467                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3468                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3469                 WREG32(CP_PFP_UCODE_ADDR, 0);
3470                 for (i = 0; i < fw_size; i++)
3471                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3472                 WREG32(CP_PFP_UCODE_ADDR, 0);
3473
3474                 /* CE */
3475                 fw_data = (const __le32 *)
3476                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3477                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3478                 WREG32(CP_CE_UCODE_ADDR, 0);
3479                 for (i = 0; i < fw_size; i++)
3480                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3481                 WREG32(CP_CE_UCODE_ADDR, 0);
3482
3483                 /* ME */
3484                 fw_data = (const __be32 *)
3485                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3486                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3487                 WREG32(CP_ME_RAM_WADDR, 0);
3488                 for (i = 0; i < fw_size; i++)
3489                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3490                 WREG32(CP_ME_RAM_WADDR, 0);
3491         } else {
3492                 const __be32 *fw_data;
3493
3494                 /* PFP */
3495                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3496                 WREG32(CP_PFP_UCODE_ADDR, 0);
3497                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3498                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3499                 WREG32(CP_PFP_UCODE_ADDR, 0);
3500
3501                 /* CE */
3502                 fw_data = (const __be32 *)rdev->ce_fw->data;
3503                 WREG32(CP_CE_UCODE_ADDR, 0);
3504                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3505                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3506                 WREG32(CP_CE_UCODE_ADDR, 0);
3507
3508                 /* ME */
3509                 fw_data = (const __be32 *)rdev->me_fw->data;
3510                 WREG32(CP_ME_RAM_WADDR, 0);
3511                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3512                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3513                 WREG32(CP_ME_RAM_WADDR, 0);
3514         }
3515
3516         WREG32(CP_PFP_UCODE_ADDR, 0);
3517         WREG32(CP_CE_UCODE_ADDR, 0);
3518         WREG32(CP_ME_RAM_WADDR, 0);
3519         WREG32(CP_ME_RAM_RADDR, 0);
3520         return 0;
3521 }
3522
3523 static int si_cp_start(struct radeon_device *rdev)
3524 {
3525         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3526         int r, i;
3527
3528         r = radeon_ring_lock(rdev, ring, 7 + 4);
3529         if (r) {
3530                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3531                 return r;
3532         }
3533         /* init the CP */
3534         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3535         radeon_ring_write(ring, 0x1);
3536         radeon_ring_write(ring, 0x0);
3537         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3538         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3539         radeon_ring_write(ring, 0);
3540         radeon_ring_write(ring, 0);
3541
3542         /* init the CE partitions */
3543         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3544         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3545         radeon_ring_write(ring, 0xc000);
3546         radeon_ring_write(ring, 0xe000);
3547         radeon_ring_unlock_commit(rdev, ring, false);
3548
3549         si_cp_enable(rdev, true);
3550
3551         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3552         if (r) {
3553                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3554                 return r;
3555         }
3556
3557         /* setup clear context state */
3558         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3559         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3560
3561         for (i = 0; i < si_default_size; i++)
3562                 radeon_ring_write(ring, si_default_state[i]);
3563
3564         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3565         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3566
3567         /* set clear context state */
3568         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3569         radeon_ring_write(ring, 0);
3570
3571         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3572         radeon_ring_write(ring, 0x00000316);
3573         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3574         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3575
3576         radeon_ring_unlock_commit(rdev, ring, false);
3577
3578         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3579                 ring = &rdev->ring[i];
3580                 r = radeon_ring_lock(rdev, ring, 2);
3581
3582                 /* clear the compute context state */
3583                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3584                 radeon_ring_write(ring, 0);
3585
3586                 radeon_ring_unlock_commit(rdev, ring, false);
3587         }
3588
3589         return 0;
3590 }
3591
3592 static void si_cp_fini(struct radeon_device *rdev)
3593 {
3594         struct radeon_ring *ring;
3595         si_cp_enable(rdev, false);
3596
3597         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3598         radeon_ring_fini(rdev, ring);
3599         radeon_scratch_free(rdev, ring->rptr_save_reg);
3600
3601         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3602         radeon_ring_fini(rdev, ring);
3603         radeon_scratch_free(rdev, ring->rptr_save_reg);
3604
3605         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3606         radeon_ring_fini(rdev, ring);
3607         radeon_scratch_free(rdev, ring->rptr_save_reg);
3608 }
3609
3610 static int si_cp_resume(struct radeon_device *rdev)
3611 {
3612         struct radeon_ring *ring;
3613         u32 tmp;
3614         u32 rb_bufsz;
3615         int r;
3616
3617         si_enable_gui_idle_interrupt(rdev, false);
3618
3619         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3620         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3621
3622         /* Set the write pointer delay */
3623         WREG32(CP_RB_WPTR_DELAY, 0);
3624
3625         WREG32(CP_DEBUG, 0);
3626         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3627
3628         /* ring 0 - compute and gfx */
3629         /* Set ring buffer size */
3630         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3631         rb_bufsz = order_base_2(ring->ring_size / 8);
3632         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3633 #ifdef __BIG_ENDIAN
3634         tmp |= BUF_SWAP_32BIT;
3635 #endif
3636         WREG32(CP_RB0_CNTL, tmp);
3637
3638         /* Initialize the ring buffer's read and write pointers */
3639         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3640         ring->wptr = 0;
3641         WREG32(CP_RB0_WPTR, ring->wptr);
3642
3643         /* set the wb address whether it's enabled or not */
3644         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3645         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3646
3647         if (rdev->wb.enabled)
3648                 WREG32(SCRATCH_UMSK, 0xff);
3649         else {
3650                 tmp |= RB_NO_UPDATE;
3651                 WREG32(SCRATCH_UMSK, 0);
3652         }
3653
3654         mdelay(1);
3655         WREG32(CP_RB0_CNTL, tmp);
3656
3657         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3658
3659         /* ring1  - compute only */
3660         /* Set ring buffer size */
3661         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3662         rb_bufsz = order_base_2(ring->ring_size / 8);
3663         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3664 #ifdef __BIG_ENDIAN
3665         tmp |= BUF_SWAP_32BIT;
3666 #endif
3667         WREG32(CP_RB1_CNTL, tmp);
3668
3669         /* Initialize the ring buffer's read and write pointers */
3670         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3671         ring->wptr = 0;
3672         WREG32(CP_RB1_WPTR, ring->wptr);
3673
3674         /* set the wb address whether it's enabled or not */
3675         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3676         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3677
3678         mdelay(1);
3679         WREG32(CP_RB1_CNTL, tmp);
3680
3681         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3682
3683         /* ring2 - compute only */
3684         /* Set ring buffer size */
3685         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3686         rb_bufsz = order_base_2(ring->ring_size / 8);
3687         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3688 #ifdef __BIG_ENDIAN
3689         tmp |= BUF_SWAP_32BIT;
3690 #endif
3691         WREG32(CP_RB2_CNTL, tmp);
3692
3693         /* Initialize the ring buffer's read and write pointers */
3694         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3695         ring->wptr = 0;
3696         WREG32(CP_RB2_WPTR, ring->wptr);
3697
3698         /* set the wb address whether it's enabled or not */
3699         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3700         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3701
3702         mdelay(1);
3703         WREG32(CP_RB2_CNTL, tmp);
3704
3705         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3706
3707         /* start the rings */
3708         si_cp_start(rdev);
3709         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3710         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3711         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3712         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3713         if (r) {
3714                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3715                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3716                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3717                 return r;
3718         }
3719         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3720         if (r) {
3721                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3722         }
3723         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3724         if (r) {
3725                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3726         }
3727
3728         si_enable_gui_idle_interrupt(rdev, true);
3729
3730         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3731                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3732
3733         return 0;
3734 }
3735
3736 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3737 {
3738         u32 reset_mask = 0;
3739         u32 tmp;
3740
3741         /* GRBM_STATUS */
3742         tmp = RREG32(GRBM_STATUS);
3743         if (tmp & (PA_BUSY | SC_BUSY |
3744                    BCI_BUSY | SX_BUSY |
3745                    TA_BUSY | VGT_BUSY |
3746                    DB_BUSY | CB_BUSY |
3747                    GDS_BUSY | SPI_BUSY |
3748                    IA_BUSY | IA_BUSY_NO_DMA))
3749                 reset_mask |= RADEON_RESET_GFX;
3750
3751         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3752                    CP_BUSY | CP_COHERENCY_BUSY))
3753                 reset_mask |= RADEON_RESET_CP;
3754
3755         if (tmp & GRBM_EE_BUSY)
3756                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3757
3758         /* GRBM_STATUS2 */
3759         tmp = RREG32(GRBM_STATUS2);
3760         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3761                 reset_mask |= RADEON_RESET_RLC;
3762
3763         /* DMA_STATUS_REG 0 */
3764         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3765         if (!(tmp & DMA_IDLE))
3766                 reset_mask |= RADEON_RESET_DMA;
3767
3768         /* DMA_STATUS_REG 1 */
3769         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3770         if (!(tmp & DMA_IDLE))
3771                 reset_mask |= RADEON_RESET_DMA1;
3772
3773         /* SRBM_STATUS2 */
3774         tmp = RREG32(SRBM_STATUS2);
3775         if (tmp & DMA_BUSY)
3776                 reset_mask |= RADEON_RESET_DMA;
3777
3778         if (tmp & DMA1_BUSY)
3779                 reset_mask |= RADEON_RESET_DMA1;
3780
3781         /* SRBM_STATUS */
3782         tmp = RREG32(SRBM_STATUS);
3783
3784         if (tmp & IH_BUSY)
3785                 reset_mask |= RADEON_RESET_IH;
3786
3787         if (tmp & SEM_BUSY)
3788                 reset_mask |= RADEON_RESET_SEM;
3789
3790         if (tmp & GRBM_RQ_PENDING)
3791                 reset_mask |= RADEON_RESET_GRBM;
3792
3793         if (tmp & VMC_BUSY)
3794                 reset_mask |= RADEON_RESET_VMC;
3795
3796         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3797                    MCC_BUSY | MCD_BUSY))
3798                 reset_mask |= RADEON_RESET_MC;
3799
3800         if (evergreen_is_display_hung(rdev))
3801                 reset_mask |= RADEON_RESET_DISPLAY;
3802
3803         /* VM_L2_STATUS */
3804         tmp = RREG32(VM_L2_STATUS);
3805         if (tmp & L2_BUSY)
3806                 reset_mask |= RADEON_RESET_VMC;
3807
3808         /* Skip MC reset as it's mostly likely not hung, just busy */
3809         if (reset_mask & RADEON_RESET_MC) {
3810                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3811                 reset_mask &= ~RADEON_RESET_MC;
3812         }
3813
3814         return reset_mask;
3815 }
3816
3817 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3818 {
3819         struct evergreen_mc_save save;
3820         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3821         u32 tmp;
3822
3823         if (reset_mask == 0)
3824                 return;
3825
3826         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3827
3828         evergreen_print_gpu_status_regs(rdev);
3829         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3830                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3831         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3832                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3833
3834         /* disable PG/CG */
3835         si_fini_pg(rdev);
3836         si_fini_cg(rdev);
3837
3838         /* stop the rlc */
3839         si_rlc_stop(rdev);
3840
3841         /* Disable CP parsing/prefetching */
3842         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3843
3844         if (reset_mask & RADEON_RESET_DMA) {
3845                 /* dma0 */
3846                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3847                 tmp &= ~DMA_RB_ENABLE;
3848                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3849         }
3850         if (reset_mask & RADEON_RESET_DMA1) {
3851                 /* dma1 */
3852                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3853                 tmp &= ~DMA_RB_ENABLE;
3854                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3855         }
3856
3857         udelay(50);
3858
3859         evergreen_mc_stop(rdev, &save);
3860         if (evergreen_mc_wait_for_idle(rdev)) {
3861                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3862         }
3863
3864         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3865                 grbm_soft_reset = SOFT_RESET_CB |
3866                         SOFT_RESET_DB |
3867                         SOFT_RESET_GDS |
3868                         SOFT_RESET_PA |
3869                         SOFT_RESET_SC |
3870                         SOFT_RESET_BCI |
3871                         SOFT_RESET_SPI |
3872                         SOFT_RESET_SX |
3873                         SOFT_RESET_TC |
3874                         SOFT_RESET_TA |
3875                         SOFT_RESET_VGT |
3876                         SOFT_RESET_IA;
3877         }
3878
3879         if (reset_mask & RADEON_RESET_CP) {
3880                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3881
3882                 srbm_soft_reset |= SOFT_RESET_GRBM;
3883         }
3884
3885         if (reset_mask & RADEON_RESET_DMA)
3886                 srbm_soft_reset |= SOFT_RESET_DMA;
3887
3888         if (reset_mask & RADEON_RESET_DMA1)
3889                 srbm_soft_reset |= SOFT_RESET_DMA1;
3890
3891         if (reset_mask & RADEON_RESET_DISPLAY)
3892                 srbm_soft_reset |= SOFT_RESET_DC;
3893
3894         if (reset_mask & RADEON_RESET_RLC)
3895                 grbm_soft_reset |= SOFT_RESET_RLC;
3896
3897         if (reset_mask & RADEON_RESET_SEM)
3898                 srbm_soft_reset |= SOFT_RESET_SEM;
3899
3900         if (reset_mask & RADEON_RESET_IH)
3901                 srbm_soft_reset |= SOFT_RESET_IH;
3902
3903         if (reset_mask & RADEON_RESET_GRBM)
3904                 srbm_soft_reset |= SOFT_RESET_GRBM;
3905
3906         if (reset_mask & RADEON_RESET_VMC)
3907                 srbm_soft_reset |= SOFT_RESET_VMC;
3908
3909         if (reset_mask & RADEON_RESET_MC)
3910                 srbm_soft_reset |= SOFT_RESET_MC;
3911
3912         if (grbm_soft_reset) {
3913                 tmp = RREG32(GRBM_SOFT_RESET);
3914                 tmp |= grbm_soft_reset;
3915                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3916                 WREG32(GRBM_SOFT_RESET, tmp);
3917                 tmp = RREG32(GRBM_SOFT_RESET);
3918
3919                 udelay(50);
3920
3921                 tmp &= ~grbm_soft_reset;
3922                 WREG32(GRBM_SOFT_RESET, tmp);
3923                 tmp = RREG32(GRBM_SOFT_RESET);
3924         }
3925
3926         if (srbm_soft_reset) {
3927                 tmp = RREG32(SRBM_SOFT_RESET);
3928                 tmp |= srbm_soft_reset;
3929                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3930                 WREG32(SRBM_SOFT_RESET, tmp);
3931                 tmp = RREG32(SRBM_SOFT_RESET);
3932
3933                 udelay(50);
3934
3935                 tmp &= ~srbm_soft_reset;
3936                 WREG32(SRBM_SOFT_RESET, tmp);
3937                 tmp = RREG32(SRBM_SOFT_RESET);
3938         }
3939
3940         /* Wait a little for things to settle down */
3941         udelay(50);
3942
3943         evergreen_mc_resume(rdev, &save);
3944         udelay(50);
3945
3946         evergreen_print_gpu_status_regs(rdev);
3947 }
3948
3949 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3950 {
3951         u32 tmp, i;
3952
3953         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3954         tmp |= SPLL_BYPASS_EN;
3955         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3956
3957         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3958         tmp |= SPLL_CTLREQ_CHG;
3959         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3960
3961         for (i = 0; i < rdev->usec_timeout; i++) {
3962                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3963                         break;
3964                 udelay(1);
3965         }
3966
3967         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3968         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3969         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3970
3971         tmp = RREG32(MPLL_CNTL_MODE);
3972         tmp &= ~MPLL_MCLK_SEL;
3973         WREG32(MPLL_CNTL_MODE, tmp);
3974 }
3975
3976 static void si_spll_powerdown(struct radeon_device *rdev)
3977 {
3978         u32 tmp;
3979
3980         tmp = RREG32(SPLL_CNTL_MODE);
3981         tmp |= SPLL_SW_DIR_CONTROL;
3982         WREG32(SPLL_CNTL_MODE, tmp);
3983
3984         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3985         tmp |= SPLL_RESET;
3986         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3987
3988         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3989         tmp |= SPLL_SLEEP;
3990         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3991
3992         tmp = RREG32(SPLL_CNTL_MODE);
3993         tmp &= ~SPLL_SW_DIR_CONTROL;
3994         WREG32(SPLL_CNTL_MODE, tmp);
3995 }
3996
3997 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3998 {
3999         struct evergreen_mc_save save;
4000         u32 tmp, i;
4001
4002         dev_info(rdev->dev, "GPU pci config reset\n");
4003
4004         /* disable dpm? */
4005
4006         /* disable cg/pg */
4007         si_fini_pg(rdev);
4008         si_fini_cg(rdev);
4009
4010         /* Disable CP parsing/prefetching */
4011         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4012         /* dma0 */
4013         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4014         tmp &= ~DMA_RB_ENABLE;
4015         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4016         /* dma1 */
4017         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4018         tmp &= ~DMA_RB_ENABLE;
4019         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4020         /* XXX other engines? */
4021
4022         /* halt the rlc, disable cp internal ints */
4023         si_rlc_stop(rdev);
4024
4025         udelay(50);
4026
4027         /* disable mem access */
4028         evergreen_mc_stop(rdev, &save);
4029         if (evergreen_mc_wait_for_idle(rdev)) {
4030                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4031         }
4032
4033         /* set mclk/sclk to bypass */
4034         si_set_clk_bypass_mode(rdev);
4035         /* powerdown spll */
4036         si_spll_powerdown(rdev);
4037         /* disable BM */
4038         pci_clear_master(rdev->pdev);
4039         /* reset */
4040         radeon_pci_config_reset(rdev);
4041         /* wait for asic to come out of reset */
4042         for (i = 0; i < rdev->usec_timeout; i++) {
4043                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4044                         break;
4045                 udelay(1);
4046         }
4047 }
4048
4049 int si_asic_reset(struct radeon_device *rdev)
4050 {
4051         u32 reset_mask;
4052
4053         reset_mask = si_gpu_check_soft_reset(rdev);
4054
4055         if (reset_mask)
4056                 r600_set_bios_scratch_engine_hung(rdev, true);
4057
4058         /* try soft reset */
4059         si_gpu_soft_reset(rdev, reset_mask);
4060
4061         reset_mask = si_gpu_check_soft_reset(rdev);
4062
4063         /* try pci config reset */
4064         if (reset_mask && radeon_hard_reset)
4065                 si_gpu_pci_config_reset(rdev);
4066
4067         reset_mask = si_gpu_check_soft_reset(rdev);
4068
4069         if (!reset_mask)
4070                 r600_set_bios_scratch_engine_hung(rdev, false);
4071
4072         return 0;
4073 }
4074
4075 /**
4076  * si_gfx_is_lockup - Check if the GFX engine is locked up
4077  *
4078  * @rdev: radeon_device pointer
4079  * @ring: radeon_ring structure holding ring information
4080  *
4081  * Check if the GFX engine is locked up.
4082  * Returns true if the engine appears to be locked up, false if not.
4083  */
4084 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4085 {
4086         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4087
4088         if (!(reset_mask & (RADEON_RESET_GFX |
4089                             RADEON_RESET_COMPUTE |
4090                             RADEON_RESET_CP))) {
4091                 radeon_ring_lockup_update(rdev, ring);
4092                 return false;
4093         }
4094         return radeon_ring_test_lockup(rdev, ring);
4095 }
4096
4097 /* MC */
4098 static void si_mc_program(struct radeon_device *rdev)
4099 {
4100         struct evergreen_mc_save save;
4101         u32 tmp;
4102         int i, j;
4103
4104         /* Initialize HDP */
4105         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4106                 WREG32((0x2c14 + j), 0x00000000);
4107                 WREG32((0x2c18 + j), 0x00000000);
4108                 WREG32((0x2c1c + j), 0x00000000);
4109                 WREG32((0x2c20 + j), 0x00000000);
4110                 WREG32((0x2c24 + j), 0x00000000);
4111         }
4112         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4113
4114         evergreen_mc_stop(rdev, &save);
4115         if (radeon_mc_wait_for_idle(rdev)) {
4116                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4117         }
4118         if (!ASIC_IS_NODCE(rdev))
4119                 /* Lockout access through VGA aperture*/
4120                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4121         /* Update configuration */
4122         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4123                rdev->mc.vram_start >> 12);
4124         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4125                rdev->mc.vram_end >> 12);
4126         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4127                rdev->vram_scratch.gpu_addr >> 12);
4128         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4129         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4130         WREG32(MC_VM_FB_LOCATION, tmp);
4131         /* XXX double check these! */
4132         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4133         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4134         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4135         WREG32(MC_VM_AGP_BASE, 0);
4136         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4137         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4138         if (radeon_mc_wait_for_idle(rdev)) {
4139                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4140         }
4141         evergreen_mc_resume(rdev, &save);
4142         if (!ASIC_IS_NODCE(rdev)) {
4143                 /* we need to own VRAM, so turn off the VGA renderer here
4144                  * to stop it overwriting our objects */
4145                 rv515_vga_render_disable(rdev);
4146         }
4147 }
4148
4149 void si_vram_gtt_location(struct radeon_device *rdev,
4150                           struct radeon_mc *mc)
4151 {
4152         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4153                 /* leave room for at least 1024M GTT */
4154                 dev_warn(rdev->dev, "limiting VRAM\n");
4155                 mc->real_vram_size = 0xFFC0000000ULL;
4156                 mc->mc_vram_size = 0xFFC0000000ULL;
4157         }
4158         radeon_vram_location(rdev, &rdev->mc, 0);
4159         rdev->mc.gtt_base_align = 0;
4160         radeon_gtt_location(rdev, mc);
4161 }
4162
4163 static int si_mc_init(struct radeon_device *rdev)
4164 {
4165         u32 tmp;
4166         int chansize, numchan;
4167
4168         /* Get VRAM informations */
4169         rdev->mc.vram_is_ddr = true;
4170         tmp = RREG32(MC_ARB_RAMCFG);
4171         if (tmp & CHANSIZE_OVERRIDE) {
4172                 chansize = 16;
4173         } else if (tmp & CHANSIZE_MASK) {
4174                 chansize = 64;
4175         } else {
4176                 chansize = 32;
4177         }
4178         tmp = RREG32(MC_SHARED_CHMAP);
4179         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4180         case 0:
4181         default:
4182                 numchan = 1;
4183                 break;
4184         case 1:
4185                 numchan = 2;
4186                 break;
4187         case 2:
4188                 numchan = 4;
4189                 break;
4190         case 3:
4191                 numchan = 8;
4192                 break;
4193         case 4:
4194                 numchan = 3;
4195                 break;
4196         case 5:
4197                 numchan = 6;
4198                 break;
4199         case 6:
4200                 numchan = 10;
4201                 break;
4202         case 7:
4203                 numchan = 12;
4204                 break;
4205         case 8:
4206                 numchan = 16;
4207                 break;
4208         }
4209         rdev->mc.vram_width = numchan * chansize;
4210         /* Could aper size report 0 ? */
4211         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4212         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4213         /* size in MB on si */
4214         tmp = RREG32(CONFIG_MEMSIZE);
4215         /* some boards may have garbage in the upper 16 bits */
4216         if (tmp & 0xffff0000) {
4217                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4218                 if (tmp & 0xffff)
4219                         tmp &= 0xffff;
4220         }
4221         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4222         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4223         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4224         si_vram_gtt_location(rdev, &rdev->mc);
4225         radeon_update_bandwidth_info(rdev);
4226
4227         return 0;
4228 }
4229
4230 /*
4231  * GART
4232  */
4233 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4234 {
4235         /* flush hdp cache */
4236         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4237
4238         /* bits 0-15 are the VM contexts0-15 */
4239         WREG32(VM_INVALIDATE_REQUEST, 1);
4240 }
4241
4242 static int si_pcie_gart_enable(struct radeon_device *rdev)
4243 {
4244         int r, i;
4245
4246         if (rdev->gart.robj == NULL) {
4247                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4248                 return -EINVAL;
4249         }
4250         r = radeon_gart_table_vram_pin(rdev);
4251         if (r)
4252                 return r;
4253         /* Setup TLB control */
4254         WREG32(MC_VM_MX_L1_TLB_CNTL,
4255                (0xA << 7) |
4256                ENABLE_L1_TLB |
4257                ENABLE_L1_FRAGMENT_PROCESSING |
4258                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4259                ENABLE_ADVANCED_DRIVER_MODEL |
4260                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4261         /* Setup L2 cache */
4262         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4263                ENABLE_L2_FRAGMENT_PROCESSING |
4264                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4265                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4266                EFFECTIVE_L2_QUEUE_SIZE(7) |
4267                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4268         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4269         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4270                BANK_SELECT(4) |
4271                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4272         /* setup context0 */
4273         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4274         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4275         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4276         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4277                         (u32)(rdev->dummy_page.addr >> 12));
4278         WREG32(VM_CONTEXT0_CNTL2, 0);
4279         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4280                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4281
4282         WREG32(0x15D4, 0);
4283         WREG32(0x15D8, 0);
4284         WREG32(0x15DC, 0);
4285
4286         /* empty context1-15 */
4287         /* set vm size, must be a multiple of 4 */
4288         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4289         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4290         /* Assign the pt base to something valid for now; the pts used for
4291          * the VMs are determined by the application and setup and assigned
4292          * on the fly in the vm part of radeon_gart.c
4293          */
4294         for (i = 1; i < 16; i++) {
4295                 if (i < 8)
4296                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4297                                rdev->vm_manager.saved_table_addr[i]);
4298                 else
4299                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4300                                rdev->vm_manager.saved_table_addr[i]);
4301         }
4302
4303         /* enable context1-15 */
4304         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4305                (u32)(rdev->dummy_page.addr >> 12));
4306         WREG32(VM_CONTEXT1_CNTL2, 4);
4307         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4308                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4309                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4310                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4311                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4312                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4313                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4314                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4315                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4316                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4317                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4318                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4319                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4320                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4321
4322         si_pcie_gart_tlb_flush(rdev);
4323         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4324                  (unsigned)(rdev->mc.gtt_size >> 20),
4325                  (unsigned long long)rdev->gart.table_addr);
4326         rdev->gart.ready = true;
4327         return 0;
4328 }
4329
4330 static void si_pcie_gart_disable(struct radeon_device *rdev)
4331 {
4332         unsigned i;
4333
4334         for (i = 1; i < 16; ++i) {
4335                 uint32_t reg;
4336                 if (i < 8)
4337                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4338                 else
4339                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4340                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4341         }
4342
4343         /* Disable all tables */
4344         WREG32(VM_CONTEXT0_CNTL, 0);
4345         WREG32(VM_CONTEXT1_CNTL, 0);
4346         /* Setup TLB control */
4347         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4348                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4349         /* Setup L2 cache */
4350         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4351                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4352                EFFECTIVE_L2_QUEUE_SIZE(7) |
4353                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4354         WREG32(VM_L2_CNTL2, 0);
4355         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4356                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4357         radeon_gart_table_vram_unpin(rdev);
4358 }
4359
4360 static void si_pcie_gart_fini(struct radeon_device *rdev)
4361 {
4362         si_pcie_gart_disable(rdev);
4363         radeon_gart_table_vram_free(rdev);
4364         radeon_gart_fini(rdev);
4365 }
4366
4367 /* vm parser */
4368 static bool si_vm_reg_valid(u32 reg)
4369 {
4370         /* context regs are fine */
4371         if (reg >= 0x28000)
4372                 return true;
4373
4374         /* check config regs */
4375         switch (reg) {
4376         case GRBM_GFX_INDEX:
4377         case CP_STRMOUT_CNTL:
4378         case VGT_VTX_VECT_EJECT_REG:
4379         case VGT_CACHE_INVALIDATION:
4380         case VGT_ESGS_RING_SIZE:
4381         case VGT_GSVS_RING_SIZE:
4382         case VGT_GS_VERTEX_REUSE:
4383         case VGT_PRIMITIVE_TYPE:
4384         case VGT_INDEX_TYPE:
4385         case VGT_NUM_INDICES:
4386         case VGT_NUM_INSTANCES:
4387         case VGT_TF_RING_SIZE:
4388         case VGT_HS_OFFCHIP_PARAM:
4389         case VGT_TF_MEMORY_BASE:
4390         case PA_CL_ENHANCE:
4391         case PA_SU_LINE_STIPPLE_VALUE:
4392         case PA_SC_LINE_STIPPLE_STATE:
4393         case PA_SC_ENHANCE:
4394         case SQC_CACHES:
4395         case SPI_STATIC_THREAD_MGMT_1:
4396         case SPI_STATIC_THREAD_MGMT_2:
4397         case SPI_STATIC_THREAD_MGMT_3:
4398         case SPI_PS_MAX_WAVE_ID:
4399         case SPI_CONFIG_CNTL:
4400         case SPI_CONFIG_CNTL_1:
4401         case TA_CNTL_AUX:
4402                 return true;
4403         default:
4404                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4405                 return false;
4406         }
4407 }
4408
4409 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4410                                   u32 *ib, struct radeon_cs_packet *pkt)
4411 {
4412         switch (pkt->opcode) {
4413         case PACKET3_NOP:
4414         case PACKET3_SET_BASE:
4415         case PACKET3_SET_CE_DE_COUNTERS:
4416         case PACKET3_LOAD_CONST_RAM:
4417         case PACKET3_WRITE_CONST_RAM:
4418         case PACKET3_WRITE_CONST_RAM_OFFSET:
4419         case PACKET3_DUMP_CONST_RAM:
4420         case PACKET3_INCREMENT_CE_COUNTER:
4421         case PACKET3_WAIT_ON_DE_COUNTER:
4422         case PACKET3_CE_WRITE:
4423                 break;
4424         default:
4425                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4426                 return -EINVAL;
4427         }
4428         return 0;
4429 }
4430
4431 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4432 {
4433         u32 start_reg, reg, i;
4434         u32 command = ib[idx + 4];
4435         u32 info = ib[idx + 1];
4436         u32 idx_value = ib[idx];
4437         if (command & PACKET3_CP_DMA_CMD_SAS) {
4438                 /* src address space is register */
4439                 if (((info & 0x60000000) >> 29) == 0) {
4440                         start_reg = idx_value << 2;
4441                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4442                                 reg = start_reg;
4443                                 if (!si_vm_reg_valid(reg)) {
4444                                         DRM_ERROR("CP DMA Bad SRC register\n");
4445                                         return -EINVAL;
4446                                 }
4447                         } else {
4448                                 for (i = 0; i < (command & 0x1fffff); i++) {
4449                                         reg = start_reg + (4 * i);
4450                                         if (!si_vm_reg_valid(reg)) {
4451                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4452                                                 return -EINVAL;
4453                                         }
4454                                 }
4455                         }
4456                 }
4457         }
4458         if (command & PACKET3_CP_DMA_CMD_DAS) {
4459                 /* dst address space is register */
4460                 if (((info & 0x00300000) >> 20) == 0) {
4461                         start_reg = ib[idx + 2];
4462                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4463                                 reg = start_reg;
4464                                 if (!si_vm_reg_valid(reg)) {
4465                                         DRM_ERROR("CP DMA Bad DST register\n");
4466                                         return -EINVAL;
4467                                 }
4468                         } else {
4469                                 for (i = 0; i < (command & 0x1fffff); i++) {
4470                                         reg = start_reg + (4 * i);
4471                                 if (!si_vm_reg_valid(reg)) {
4472                                                 DRM_ERROR("CP DMA Bad DST register\n");
4473                                                 return -EINVAL;
4474                                         }
4475                                 }
4476                         }
4477                 }
4478         }
4479         return 0;
4480 }
4481
4482 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4483                                    u32 *ib, struct radeon_cs_packet *pkt)
4484 {
4485         int r;
4486         u32 idx = pkt->idx + 1;
4487         u32 idx_value = ib[idx];
4488         u32 start_reg, end_reg, reg, i;
4489
4490         switch (pkt->opcode) {
4491         case PACKET3_NOP:
4492         case PACKET3_SET_BASE:
4493         case PACKET3_CLEAR_STATE:
4494         case PACKET3_INDEX_BUFFER_SIZE:
4495         case PACKET3_DISPATCH_DIRECT:
4496         case PACKET3_DISPATCH_INDIRECT:
4497         case PACKET3_ALLOC_GDS:
4498         case PACKET3_WRITE_GDS_RAM:
4499         case PACKET3_ATOMIC_GDS:
4500         case PACKET3_ATOMIC:
4501         case PACKET3_OCCLUSION_QUERY:
4502         case PACKET3_SET_PREDICATION:
4503         case PACKET3_COND_EXEC:
4504         case PACKET3_PRED_EXEC:
4505         case PACKET3_DRAW_INDIRECT:
4506         case PACKET3_DRAW_INDEX_INDIRECT:
4507         case PACKET3_INDEX_BASE:
4508         case PACKET3_DRAW_INDEX_2:
4509         case PACKET3_CONTEXT_CONTROL:
4510         case PACKET3_INDEX_TYPE:
4511         case PACKET3_DRAW_INDIRECT_MULTI:
4512         case PACKET3_DRAW_INDEX_AUTO:
4513         case PACKET3_DRAW_INDEX_IMMD:
4514         case PACKET3_NUM_INSTANCES:
4515         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4516         case PACKET3_STRMOUT_BUFFER_UPDATE:
4517         case PACKET3_DRAW_INDEX_OFFSET_2:
4518         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4519         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4520         case PACKET3_MPEG_INDEX:
4521         case PACKET3_WAIT_REG_MEM:
4522         case PACKET3_MEM_WRITE:
4523         case PACKET3_PFP_SYNC_ME:
4524         case PACKET3_SURFACE_SYNC:
4525         case PACKET3_EVENT_WRITE:
4526         case PACKET3_EVENT_WRITE_EOP:
4527         case PACKET3_EVENT_WRITE_EOS:
4528         case PACKET3_SET_CONTEXT_REG:
4529         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4530         case PACKET3_SET_SH_REG:
4531         case PACKET3_SET_SH_REG_OFFSET:
4532         case PACKET3_INCREMENT_DE_COUNTER:
4533         case PACKET3_WAIT_ON_CE_COUNTER:
4534         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4535         case PACKET3_ME_WRITE:
4536                 break;
4537         case PACKET3_COPY_DATA:
4538                 if ((idx_value & 0xf00) == 0) {
4539                         reg = ib[idx + 3] * 4;
4540                         if (!si_vm_reg_valid(reg))
4541                                 return -EINVAL;
4542                 }
4543                 break;
4544         case PACKET3_WRITE_DATA:
4545                 if ((idx_value & 0xf00) == 0) {
4546                         start_reg = ib[idx + 1] * 4;
4547                         if (idx_value & 0x10000) {
4548                                 if (!si_vm_reg_valid(start_reg))
4549                                         return -EINVAL;
4550                         } else {
4551                                 for (i = 0; i < (pkt->count - 2); i++) {
4552                                         reg = start_reg + (4 * i);
4553                                         if (!si_vm_reg_valid(reg))
4554                                                 return -EINVAL;
4555                                 }
4556                         }
4557                 }
4558                 break;
4559         case PACKET3_COND_WRITE:
4560                 if (idx_value & 0x100) {
4561                         reg = ib[idx + 5] * 4;
4562                         if (!si_vm_reg_valid(reg))
4563                                 return -EINVAL;
4564                 }
4565                 break;
4566         case PACKET3_COPY_DW:
4567                 if (idx_value & 0x2) {
4568                         reg = ib[idx + 3] * 4;
4569                         if (!si_vm_reg_valid(reg))
4570                                 return -EINVAL;
4571                 }
4572                 break;
4573         case PACKET3_SET_CONFIG_REG:
4574                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4575                 end_reg = 4 * pkt->count + start_reg - 4;
4576                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4577                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4578                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4579                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4580                         return -EINVAL;
4581                 }
4582                 for (i = 0; i < pkt->count; i++) {
4583                         reg = start_reg + (4 * i);
4584                         if (!si_vm_reg_valid(reg))
4585                                 return -EINVAL;
4586                 }
4587                 break;
4588         case PACKET3_CP_DMA:
4589                 r = si_vm_packet3_cp_dma_check(ib, idx);
4590                 if (r)
4591                         return r;
4592                 break;
4593         default:
4594                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4595                 return -EINVAL;
4596         }
4597         return 0;
4598 }
4599
4600 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4601                                        u32 *ib, struct radeon_cs_packet *pkt)
4602 {
4603         int r;
4604         u32 idx = pkt->idx + 1;
4605         u32 idx_value = ib[idx];
4606         u32 start_reg, reg, i;
4607
4608         switch (pkt->opcode) {
4609         case PACKET3_NOP:
4610         case PACKET3_SET_BASE:
4611         case PACKET3_CLEAR_STATE:
4612         case PACKET3_DISPATCH_DIRECT:
4613         case PACKET3_DISPATCH_INDIRECT:
4614         case PACKET3_ALLOC_GDS:
4615         case PACKET3_WRITE_GDS_RAM:
4616         case PACKET3_ATOMIC_GDS:
4617         case PACKET3_ATOMIC:
4618         case PACKET3_OCCLUSION_QUERY:
4619         case PACKET3_SET_PREDICATION:
4620         case PACKET3_COND_EXEC:
4621         case PACKET3_PRED_EXEC:
4622         case PACKET3_CONTEXT_CONTROL:
4623         case PACKET3_STRMOUT_BUFFER_UPDATE:
4624         case PACKET3_WAIT_REG_MEM:
4625         case PACKET3_MEM_WRITE:
4626         case PACKET3_PFP_SYNC_ME:
4627         case PACKET3_SURFACE_SYNC:
4628         case PACKET3_EVENT_WRITE:
4629         case PACKET3_EVENT_WRITE_EOP:
4630         case PACKET3_EVENT_WRITE_EOS:
4631         case PACKET3_SET_CONTEXT_REG:
4632         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4633         case PACKET3_SET_SH_REG:
4634         case PACKET3_SET_SH_REG_OFFSET:
4635         case PACKET3_INCREMENT_DE_COUNTER:
4636         case PACKET3_WAIT_ON_CE_COUNTER:
4637         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4638         case PACKET3_ME_WRITE:
4639                 break;
4640         case PACKET3_COPY_DATA:
4641                 if ((idx_value & 0xf00) == 0) {
4642                         reg = ib[idx + 3] * 4;
4643                         if (!si_vm_reg_valid(reg))
4644                                 return -EINVAL;
4645                 }
4646                 break;
4647         case PACKET3_WRITE_DATA:
4648                 if ((idx_value & 0xf00) == 0) {
4649                         start_reg = ib[idx + 1] * 4;
4650                         if (idx_value & 0x10000) {
4651                                 if (!si_vm_reg_valid(start_reg))
4652                                         return -EINVAL;
4653                         } else {
4654                                 for (i = 0; i < (pkt->count - 2); i++) {
4655                                         reg = start_reg + (4 * i);
4656                                         if (!si_vm_reg_valid(reg))
4657                                                 return -EINVAL;
4658                                 }
4659                         }
4660                 }
4661                 break;
4662         case PACKET3_COND_WRITE:
4663                 if (idx_value & 0x100) {
4664                         reg = ib[idx + 5] * 4;
4665                         if (!si_vm_reg_valid(reg))
4666                                 return -EINVAL;
4667                 }
4668                 break;
4669         case PACKET3_COPY_DW:
4670                 if (idx_value & 0x2) {
4671                         reg = ib[idx + 3] * 4;
4672                         if (!si_vm_reg_valid(reg))
4673                                 return -EINVAL;
4674                 }
4675                 break;
4676         case PACKET3_CP_DMA:
4677                 r = si_vm_packet3_cp_dma_check(ib, idx);
4678                 if (r)
4679                         return r;
4680                 break;
4681         default:
4682                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4683                 return -EINVAL;
4684         }
4685         return 0;
4686 }
4687
4688 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4689 {
4690         int ret = 0;
4691         u32 idx = 0, i;
4692         struct radeon_cs_packet pkt;
4693
4694         do {
4695                 pkt.idx = idx;
4696                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4697                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4698                 pkt.one_reg_wr = 0;
4699                 switch (pkt.type) {
4700                 case RADEON_PACKET_TYPE0:
4701                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4702                         ret = -EINVAL;
4703                         break;
4704                 case RADEON_PACKET_TYPE2:
4705                         idx += 1;
4706                         break;
4707                 case RADEON_PACKET_TYPE3:
4708                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4709                         if (ib->is_const_ib)
4710                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4711                         else {
4712                                 switch (ib->ring) {
4713                                 case RADEON_RING_TYPE_GFX_INDEX:
4714                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4715                                         break;
4716                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4717                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4718                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4719                                         break;
4720                                 default:
4721                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4722                                         ret = -EINVAL;
4723                                         break;
4724                                 }
4725                         }
4726                         idx += pkt.count + 2;
4727                         break;
4728                 default:
4729                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4730                         ret = -EINVAL;
4731                         break;
4732                 }
4733                 if (ret) {
4734                         for (i = 0; i < ib->length_dw; i++) {
4735                                 if (i == idx)
4736                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4737                                 else
4738                                         printk("\t0x%08x\n", ib->ptr[i]);
4739                         }
4740                         break;
4741                 }
4742         } while (idx < ib->length_dw);
4743
4744         return ret;
4745 }
4746
4747 /*
4748  * vm
4749  */
4750 int si_vm_init(struct radeon_device *rdev)
4751 {
4752         /* number of VMs */
4753         rdev->vm_manager.nvm = 16;
4754         /* base offset of vram pages */
4755         rdev->vm_manager.vram_base_offset = 0;
4756
4757         return 0;
4758 }
4759
4760 void si_vm_fini(struct radeon_device *rdev)
4761 {
4762 }
4763
4764 /**
4765  * si_vm_decode_fault - print human readable fault info
4766  *
4767  * @rdev: radeon_device pointer
4768  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4769  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4770  *
4771  * Print human readable fault information (SI).
4772  */
4773 static void si_vm_decode_fault(struct radeon_device *rdev,
4774                                u32 status, u32 addr)
4775 {
4776         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4777         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4778         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4779         char *block;
4780
4781         if (rdev->family == CHIP_TAHITI) {
4782                 switch (mc_id) {
4783                 case 160:
4784                 case 144:
4785                 case 96:
4786                 case 80:
4787                 case 224:
4788                 case 208:
4789                 case 32:
4790                 case 16:
4791                         block = "CB";
4792                         break;
4793                 case 161:
4794                 case 145:
4795                 case 97:
4796                 case 81:
4797                 case 225:
4798                 case 209:
4799                 case 33:
4800                 case 17:
4801                         block = "CB_FMASK";
4802                         break;
4803                 case 162:
4804                 case 146:
4805                 case 98:
4806                 case 82:
4807                 case 226:
4808                 case 210:
4809                 case 34:
4810                 case 18:
4811                         block = "CB_CMASK";
4812                         break;
4813                 case 163:
4814                 case 147:
4815                 case 99:
4816                 case 83:
4817                 case 227:
4818                 case 211:
4819                 case 35:
4820                 case 19:
4821                         block = "CB_IMMED";
4822                         break;
4823                 case 164:
4824                 case 148:
4825                 case 100:
4826                 case 84:
4827                 case 228:
4828                 case 212:
4829                 case 36:
4830                 case 20:
4831                         block = "DB";
4832                         break;
4833                 case 165:
4834                 case 149:
4835                 case 101:
4836                 case 85:
4837                 case 229:
4838                 case 213:
4839                 case 37:
4840                 case 21:
4841                         block = "DB_HTILE";
4842                         break;
4843                 case 167:
4844                 case 151:
4845                 case 103:
4846                 case 87:
4847                 case 231:
4848                 case 215:
4849                 case 39:
4850                 case 23:
4851                         block = "DB_STEN";
4852                         break;
4853                 case 72:
4854                 case 68:
4855                 case 64:
4856                 case 8:
4857                 case 4:
4858                 case 0:
4859                 case 136:
4860                 case 132:
4861                 case 128:
4862                 case 200:
4863                 case 196:
4864                 case 192:
4865                         block = "TC";
4866                         break;
4867                 case 112:
4868                 case 48:
4869                         block = "CP";
4870                         break;
4871                 case 49:
4872                 case 177:
4873                 case 50:
4874                 case 178:
4875                         block = "SH";
4876                         break;
4877                 case 53:
4878                 case 190:
4879                         block = "VGT";
4880                         break;
4881                 case 117:
4882                         block = "IH";
4883                         break;
4884                 case 51:
4885                 case 115:
4886                         block = "RLC";
4887                         break;
4888                 case 119:
4889                 case 183:
4890                         block = "DMA0";
4891                         break;
4892                 case 61:
4893                         block = "DMA1";
4894                         break;
4895                 case 248:
4896                 case 120:
4897                         block = "HDP";
4898                         break;
4899                 default:
4900                         block = "unknown";
4901                         break;
4902                 }
4903         } else {
4904                 switch (mc_id) {
4905                 case 32:
4906                 case 16:
4907                 case 96:
4908                 case 80:
4909                 case 160:
4910                 case 144:
4911                 case 224:
4912                 case 208:
4913                         block = "CB";
4914                         break;
4915                 case 33:
4916                 case 17:
4917                 case 97:
4918                 case 81:
4919                 case 161:
4920                 case 145:
4921                 case 225:
4922                 case 209:
4923                         block = "CB_FMASK";
4924                         break;
4925                 case 34:
4926                 case 18:
4927                 case 98:
4928                 case 82:
4929                 case 162:
4930                 case 146:
4931                 case 226:
4932                 case 210:
4933                         block = "CB_CMASK";
4934                         break;
4935                 case 35:
4936                 case 19:
4937                 case 99:
4938                 case 83:
4939                 case 163:
4940                 case 147:
4941                 case 227:
4942                 case 211:
4943                         block = "CB_IMMED";
4944                         break;
4945                 case 36:
4946                 case 20:
4947                 case 100:
4948                 case 84:
4949                 case 164:
4950                 case 148:
4951                 case 228:
4952                 case 212:
4953                         block = "DB";
4954                         break;
4955                 case 37:
4956                 case 21:
4957                 case 101:
4958                 case 85:
4959                 case 165:
4960                 case 149:
4961                 case 229:
4962                 case 213:
4963                         block = "DB_HTILE";
4964                         break;
4965                 case 39:
4966                 case 23:
4967                 case 103:
4968                 case 87:
4969                 case 167:
4970                 case 151:
4971                 case 231:
4972                 case 215:
4973                         block = "DB_STEN";
4974                         break;
4975                 case 72:
4976                 case 68:
4977                 case 8:
4978                 case 4:
4979                 case 136:
4980                 case 132:
4981                 case 200:
4982                 case 196:
4983                         block = "TC";
4984                         break;
4985                 case 112:
4986                 case 48:
4987                         block = "CP";
4988                         break;
4989                 case 49:
4990                 case 177:
4991                 case 50:
4992                 case 178:
4993                         block = "SH";
4994                         break;
4995                 case 53:
4996                         block = "VGT";
4997                         break;
4998                 case 117:
4999                         block = "IH";
5000                         break;
5001                 case 51:
5002                 case 115:
5003                         block = "RLC";
5004                         break;
5005                 case 119:
5006                 case 183:
5007                         block = "DMA0";
5008                         break;
5009                 case 61:
5010                         block = "DMA1";
5011                         break;
5012                 case 248:
5013                 case 120:
5014                         block = "HDP";
5015                         break;
5016                 default:
5017                         block = "unknown";
5018                         break;
5019                 }
5020         }
5021
5022         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5023                protections, vmid, addr,
5024                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5025                block, mc_id);
5026 }
5027
5028 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5029                  unsigned vm_id, uint64_t pd_addr)
5030 {
5031         /* write new base address */
5032         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5033         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5034                                  WRITE_DATA_DST_SEL(0)));
5035
5036         if (vm_id < 8) {
5037                 radeon_ring_write(ring,
5038                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5039         } else {
5040                 radeon_ring_write(ring,
5041                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5042         }
5043         radeon_ring_write(ring, 0);
5044         radeon_ring_write(ring, pd_addr >> 12);
5045
5046         /* flush hdp cache */
5047         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5048         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5049                                  WRITE_DATA_DST_SEL(0)));
5050         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5051         radeon_ring_write(ring, 0);
5052         radeon_ring_write(ring, 0x1);
5053
5054         /* bits 0-15 are the VM contexts0-15 */
5055         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5056         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5057                                  WRITE_DATA_DST_SEL(0)));
5058         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5059         radeon_ring_write(ring, 0);
5060         radeon_ring_write(ring, 1 << vm_id);
5061
5062         /* wait for the invalidate to complete */
5063         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5064         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5065                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5066         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5067         radeon_ring_write(ring, 0);
5068         radeon_ring_write(ring, 0); /* ref */
5069         radeon_ring_write(ring, 0); /* mask */
5070         radeon_ring_write(ring, 0x20); /* poll interval */
5071
5072         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5073         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5074         radeon_ring_write(ring, 0x0);
5075 }
5076
5077 /*
5078  *  Power and clock gating
5079  */
5080 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5081 {
5082         int i;
5083
5084         for (i = 0; i < rdev->usec_timeout; i++) {
5085                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5086                         break;
5087                 udelay(1);
5088         }
5089
5090         for (i = 0; i < rdev->usec_timeout; i++) {
5091                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5092                         break;
5093                 udelay(1);
5094         }
5095 }
5096
5097 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5098                                          bool enable)
5099 {
5100         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5101         u32 mask;
5102         int i;
5103
5104         if (enable)
5105                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5106         else
5107                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5108         WREG32(CP_INT_CNTL_RING0, tmp);
5109
5110         if (!enable) {
5111                 /* read a gfx register */
5112                 tmp = RREG32(DB_DEPTH_INFO);
5113
5114                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5115                 for (i = 0; i < rdev->usec_timeout; i++) {
5116                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5117                                 break;
5118                         udelay(1);
5119                 }
5120         }
5121 }
5122
5123 static void si_set_uvd_dcm(struct radeon_device *rdev,
5124                            bool sw_mode)
5125 {
5126         u32 tmp, tmp2;
5127
5128         tmp = RREG32(UVD_CGC_CTRL);
5129         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5130         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5131
5132         if (sw_mode) {
5133                 tmp &= ~0x7ffff800;
5134                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5135         } else {
5136                 tmp |= 0x7ffff800;
5137                 tmp2 = 0;
5138         }
5139
5140         WREG32(UVD_CGC_CTRL, tmp);
5141         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5142 }
5143
5144 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5145 {
5146         bool hw_mode = true;
5147
5148         if (hw_mode) {
5149                 si_set_uvd_dcm(rdev, false);
5150         } else {
5151                 u32 tmp = RREG32(UVD_CGC_CTRL);
5152                 tmp &= ~DCM;
5153                 WREG32(UVD_CGC_CTRL, tmp);
5154         }
5155 }
5156
5157 static u32 si_halt_rlc(struct radeon_device *rdev)
5158 {
5159         u32 data, orig;
5160
5161         orig = data = RREG32(RLC_CNTL);
5162
5163         if (data & RLC_ENABLE) {
5164                 data &= ~RLC_ENABLE;
5165                 WREG32(RLC_CNTL, data);
5166
5167                 si_wait_for_rlc_serdes(rdev);
5168         }
5169
5170         return orig;
5171 }
5172
5173 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5174 {
5175         u32 tmp;
5176
5177         tmp = RREG32(RLC_CNTL);
5178         if (tmp != rlc)
5179                 WREG32(RLC_CNTL, rlc);
5180 }
5181
5182 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5183 {
5184         u32 data, orig;
5185
5186         orig = data = RREG32(DMA_PG);
5187         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5188                 data |= PG_CNTL_ENABLE;
5189         else
5190                 data &= ~PG_CNTL_ENABLE;
5191         if (orig != data)
5192                 WREG32(DMA_PG, data);
5193 }
5194
5195 static void si_init_dma_pg(struct radeon_device *rdev)
5196 {
5197         u32 tmp;
5198
5199         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5200         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5201
5202         for (tmp = 0; tmp < 5; tmp++)
5203                 WREG32(DMA_PGFSM_WRITE, 0);
5204 }
5205
5206 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5207                                bool enable)
5208 {
5209         u32 tmp;
5210
5211         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5212                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5213                 WREG32(RLC_TTOP_D, tmp);
5214
5215                 tmp = RREG32(RLC_PG_CNTL);
5216                 tmp |= GFX_PG_ENABLE;
5217                 WREG32(RLC_PG_CNTL, tmp);
5218
5219                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5220                 tmp |= AUTO_PG_EN;
5221                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5222         } else {
5223                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5224                 tmp &= ~AUTO_PG_EN;
5225                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5226
5227                 tmp = RREG32(DB_RENDER_CONTROL);
5228         }
5229 }
5230
5231 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5232 {
5233         u32 tmp;
5234
5235         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5236
5237         tmp = RREG32(RLC_PG_CNTL);
5238         tmp |= GFX_PG_SRC;
5239         WREG32(RLC_PG_CNTL, tmp);
5240
5241         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5242
5243         tmp = RREG32(RLC_AUTO_PG_CTRL);
5244
5245         tmp &= ~GRBM_REG_SGIT_MASK;
5246         tmp |= GRBM_REG_SGIT(0x700);
5247         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5248         WREG32(RLC_AUTO_PG_CTRL, tmp);
5249 }
5250
5251 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5252 {
5253         u32 mask = 0, tmp, tmp1;
5254         int i;
5255
5256         si_select_se_sh(rdev, se, sh);
5257         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5258         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5259         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5260
5261         tmp &= 0xffff0000;
5262
5263         tmp |= tmp1;
5264         tmp >>= 16;
5265
5266         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5267                 mask <<= 1;
5268                 mask |= 1;
5269         }
5270
5271         return (~tmp) & mask;
5272 }
5273
5274 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5275 {
5276         u32 i, j, k, active_cu_number = 0;
5277         u32 mask, counter, cu_bitmap;
5278         u32 tmp = 0;
5279
5280         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5281                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5282                         mask = 1;
5283                         cu_bitmap = 0;
5284                         counter  = 0;
5285                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5286                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5287                                         if (counter < 2)
5288                                                 cu_bitmap |= mask;
5289                                         counter++;
5290                                 }
5291                                 mask <<= 1;
5292                         }
5293
5294                         active_cu_number += counter;
5295                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5296                 }
5297         }
5298
5299         WREG32(RLC_PG_AO_CU_MASK, tmp);
5300
5301         tmp = RREG32(RLC_MAX_PG_CU);
5302         tmp &= ~MAX_PU_CU_MASK;
5303         tmp |= MAX_PU_CU(active_cu_number);
5304         WREG32(RLC_MAX_PG_CU, tmp);
5305 }
5306
5307 static void si_enable_cgcg(struct radeon_device *rdev,
5308                            bool enable)
5309 {
5310         u32 data, orig, tmp;
5311
5312         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5313
5314         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5315                 si_enable_gui_idle_interrupt(rdev, true);
5316
5317                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5318
5319                 tmp = si_halt_rlc(rdev);
5320
5321                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5322                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5323                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5324
5325                 si_wait_for_rlc_serdes(rdev);
5326
5327                 si_update_rlc(rdev, tmp);
5328
5329                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5330
5331                 data |= CGCG_EN | CGLS_EN;
5332         } else {
5333                 si_enable_gui_idle_interrupt(rdev, false);
5334
5335                 RREG32(CB_CGTT_SCLK_CTRL);
5336                 RREG32(CB_CGTT_SCLK_CTRL);
5337                 RREG32(CB_CGTT_SCLK_CTRL);
5338                 RREG32(CB_CGTT_SCLK_CTRL);
5339
5340                 data &= ~(CGCG_EN | CGLS_EN);
5341         }
5342
5343         if (orig != data)
5344                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5345 }
5346
5347 static void si_enable_mgcg(struct radeon_device *rdev,
5348                            bool enable)
5349 {
5350         u32 data, orig, tmp = 0;
5351
5352         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5353                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5354                 data = 0x96940200;
5355                 if (orig != data)
5356                         WREG32(CGTS_SM_CTRL_REG, data);
5357
5358                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5359                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5360                         data |= CP_MEM_LS_EN;
5361                         if (orig != data)
5362                                 WREG32(CP_MEM_SLP_CNTL, data);
5363                 }
5364
5365                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5366                 data &= 0xffffffc0;
5367                 if (orig != data)
5368                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5369
5370                 tmp = si_halt_rlc(rdev);
5371
5372                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5373                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5374                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5375
5376                 si_update_rlc(rdev, tmp);
5377         } else {
5378                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5379                 data |= 0x00000003;
5380                 if (orig != data)
5381                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5382
5383                 data = RREG32(CP_MEM_SLP_CNTL);
5384                 if (data & CP_MEM_LS_EN) {
5385                         data &= ~CP_MEM_LS_EN;
5386                         WREG32(CP_MEM_SLP_CNTL, data);
5387                 }
5388                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5389                 data |= LS_OVERRIDE | OVERRIDE;
5390                 if (orig != data)
5391                         WREG32(CGTS_SM_CTRL_REG, data);
5392
5393                 tmp = si_halt_rlc(rdev);
5394
5395                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5396                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5397                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5398
5399                 si_update_rlc(rdev, tmp);
5400         }
5401 }
5402
5403 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5404                                bool enable)
5405 {
5406         u32 orig, data, tmp;
5407
5408         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5409                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5410                 tmp |= 0x3fff;
5411                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5412
5413                 orig = data = RREG32(UVD_CGC_CTRL);
5414                 data |= DCM;
5415                 if (orig != data)
5416                         WREG32(UVD_CGC_CTRL, data);
5417
5418                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5419                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5420         } else {
5421                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5422                 tmp &= ~0x3fff;
5423                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5424
5425                 orig = data = RREG32(UVD_CGC_CTRL);
5426                 data &= ~DCM;
5427                 if (orig != data)
5428                         WREG32(UVD_CGC_CTRL, data);
5429
5430                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5431                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5432         }
5433 }
5434
5435 static const u32 mc_cg_registers[] =
5436 {
5437         MC_HUB_MISC_HUB_CG,
5438         MC_HUB_MISC_SIP_CG,
5439         MC_HUB_MISC_VM_CG,
5440         MC_XPB_CLK_GAT,
5441         ATC_MISC_CG,
5442         MC_CITF_MISC_WR_CG,
5443         MC_CITF_MISC_RD_CG,
5444         MC_CITF_MISC_VM_CG,
5445         VM_L2_CG,
5446 };
5447
5448 static void si_enable_mc_ls(struct radeon_device *rdev,
5449                             bool enable)
5450 {
5451         int i;
5452         u32 orig, data;
5453
5454         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5455                 orig = data = RREG32(mc_cg_registers[i]);
5456                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5457                         data |= MC_LS_ENABLE;
5458                 else
5459                         data &= ~MC_LS_ENABLE;
5460                 if (data != orig)
5461                         WREG32(mc_cg_registers[i], data);
5462         }
5463 }
5464
5465 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5466                                bool enable)
5467 {
5468         int i;
5469         u32 orig, data;
5470
5471         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5472                 orig = data = RREG32(mc_cg_registers[i]);
5473                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5474                         data |= MC_CG_ENABLE;
5475                 else
5476                         data &= ~MC_CG_ENABLE;
5477                 if (data != orig)
5478                         WREG32(mc_cg_registers[i], data);
5479         }
5480 }
5481
5482 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5483                                bool enable)
5484 {
5485         u32 orig, data, offset;
5486         int i;
5487
5488         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5489                 for (i = 0; i < 2; i++) {
5490                         if (i == 0)
5491                                 offset = DMA0_REGISTER_OFFSET;
5492                         else
5493                                 offset = DMA1_REGISTER_OFFSET;
5494                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5495                         data &= ~MEM_POWER_OVERRIDE;
5496                         if (data != orig)
5497                                 WREG32(DMA_POWER_CNTL + offset, data);
5498                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5499                 }
5500         } else {
5501                 for (i = 0; i < 2; i++) {
5502                         if (i == 0)
5503                                 offset = DMA0_REGISTER_OFFSET;
5504                         else
5505                                 offset = DMA1_REGISTER_OFFSET;
5506                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5507                         data |= MEM_POWER_OVERRIDE;
5508                         if (data != orig)
5509                                 WREG32(DMA_POWER_CNTL + offset, data);
5510
5511                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5512                         data = 0xff000000;
5513                         if (data != orig)
5514                                 WREG32(DMA_CLK_CTRL + offset, data);
5515                 }
5516         }
5517 }
5518
5519 static void si_enable_bif_mgls(struct radeon_device *rdev,
5520                                bool enable)
5521 {
5522         u32 orig, data;
5523
5524         orig = data = RREG32_PCIE(PCIE_CNTL2);
5525
5526         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5527                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5528                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5529         else
5530                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5531                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5532
5533         if (orig != data)
5534                 WREG32_PCIE(PCIE_CNTL2, data);
5535 }
5536
5537 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5538                                bool enable)
5539 {
5540         u32 orig, data;
5541
5542         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5543
5544         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5545                 data &= ~CLOCK_GATING_DIS;
5546         else
5547                 data |= CLOCK_GATING_DIS;
5548
5549         if (orig != data)
5550                 WREG32(HDP_HOST_PATH_CNTL, data);
5551 }
5552
5553 static void si_enable_hdp_ls(struct radeon_device *rdev,
5554                              bool enable)
5555 {
5556         u32 orig, data;
5557
5558         orig = data = RREG32(HDP_MEM_POWER_LS);
5559
5560         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5561                 data |= HDP_LS_ENABLE;
5562         else
5563                 data &= ~HDP_LS_ENABLE;
5564
5565         if (orig != data)
5566                 WREG32(HDP_MEM_POWER_LS, data);
5567 }
5568
5569 static void si_update_cg(struct radeon_device *rdev,
5570                          u32 block, bool enable)
5571 {
5572         if (block & RADEON_CG_BLOCK_GFX) {
5573                 si_enable_gui_idle_interrupt(rdev, false);
5574                 /* order matters! */
5575                 if (enable) {
5576                         si_enable_mgcg(rdev, true);
5577                         si_enable_cgcg(rdev, true);
5578                 } else {
5579                         si_enable_cgcg(rdev, false);
5580                         si_enable_mgcg(rdev, false);
5581                 }
5582                 si_enable_gui_idle_interrupt(rdev, true);
5583         }
5584
5585         if (block & RADEON_CG_BLOCK_MC) {
5586                 si_enable_mc_mgcg(rdev, enable);
5587                 si_enable_mc_ls(rdev, enable);
5588         }
5589
5590         if (block & RADEON_CG_BLOCK_SDMA) {
5591                 si_enable_dma_mgcg(rdev, enable);
5592         }
5593
5594         if (block & RADEON_CG_BLOCK_BIF) {
5595                 si_enable_bif_mgls(rdev, enable);
5596         }
5597
5598         if (block & RADEON_CG_BLOCK_UVD) {
5599                 if (rdev->has_uvd) {
5600                         si_enable_uvd_mgcg(rdev, enable);
5601                 }
5602         }
5603
5604         if (block & RADEON_CG_BLOCK_HDP) {
5605                 si_enable_hdp_mgcg(rdev, enable);
5606                 si_enable_hdp_ls(rdev, enable);
5607         }
5608 }
5609
5610 static void si_init_cg(struct radeon_device *rdev)
5611 {
5612         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5613                             RADEON_CG_BLOCK_MC |
5614                             RADEON_CG_BLOCK_SDMA |
5615                             RADEON_CG_BLOCK_BIF |
5616                             RADEON_CG_BLOCK_HDP), true);
5617         if (rdev->has_uvd) {
5618                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5619                 si_init_uvd_internal_cg(rdev);
5620         }
5621 }
5622
5623 static void si_fini_cg(struct radeon_device *rdev)
5624 {
5625         if (rdev->has_uvd) {
5626                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5627         }
5628         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5629                             RADEON_CG_BLOCK_MC |
5630                             RADEON_CG_BLOCK_SDMA |
5631                             RADEON_CG_BLOCK_BIF |
5632                             RADEON_CG_BLOCK_HDP), false);
5633 }
5634
5635 u32 si_get_csb_size(struct radeon_device *rdev)
5636 {
5637         u32 count = 0;
5638         const struct cs_section_def *sect = NULL;
5639         const struct cs_extent_def *ext = NULL;
5640
5641         if (rdev->rlc.cs_data == NULL)
5642                 return 0;
5643
5644         /* begin clear state */
5645         count += 2;
5646         /* context control state */
5647         count += 3;
5648
5649         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5650                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5651                         if (sect->id == SECT_CONTEXT)
5652                                 count += 2 + ext->reg_count;
5653                         else
5654                                 return 0;
5655                 }
5656         }
5657         /* pa_sc_raster_config */
5658         count += 3;
5659         /* end clear state */
5660         count += 2;
5661         /* clear state */
5662         count += 2;
5663
5664         return count;
5665 }
5666
5667 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5668 {
5669         u32 count = 0, i;
5670         const struct cs_section_def *sect = NULL;
5671         const struct cs_extent_def *ext = NULL;
5672
5673         if (rdev->rlc.cs_data == NULL)
5674                 return;
5675         if (buffer == NULL)
5676                 return;
5677
5678         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5679         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5680
5681         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5682         buffer[count++] = cpu_to_le32(0x80000000);
5683         buffer[count++] = cpu_to_le32(0x80000000);
5684
5685         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5686                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5687                         if (sect->id == SECT_CONTEXT) {
5688                                 buffer[count++] =
5689                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5690                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5691                                 for (i = 0; i < ext->reg_count; i++)
5692                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5693                         } else {
5694                                 return;
5695                         }
5696                 }
5697         }
5698
5699         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5700         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5701         switch (rdev->family) {
5702         case CHIP_TAHITI:
5703         case CHIP_PITCAIRN:
5704                 buffer[count++] = cpu_to_le32(0x2a00126a);
5705                 break;
5706         case CHIP_VERDE:
5707                 buffer[count++] = cpu_to_le32(0x0000124a);
5708                 break;
5709         case CHIP_OLAND:
5710                 buffer[count++] = cpu_to_le32(0x00000082);
5711                 break;
5712         case CHIP_HAINAN:
5713                 buffer[count++] = cpu_to_le32(0x00000000);
5714                 break;
5715         default:
5716                 buffer[count++] = cpu_to_le32(0x00000000);
5717                 break;
5718         }
5719
5720         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5721         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5722
5723         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5724         buffer[count++] = cpu_to_le32(0);
5725 }
5726
5727 static void si_init_pg(struct radeon_device *rdev)
5728 {
5729         if (rdev->pg_flags) {
5730                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5731                         si_init_dma_pg(rdev);
5732                 }
5733                 si_init_ao_cu_mask(rdev);
5734                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5735                         si_init_gfx_cgpg(rdev);
5736                 } else {
5737                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5738                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5739                 }
5740                 si_enable_dma_pg(rdev, true);
5741                 si_enable_gfx_cgpg(rdev, true);
5742         } else {
5743                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5744                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5745         }
5746 }
5747
5748 static void si_fini_pg(struct radeon_device *rdev)
5749 {
5750         if (rdev->pg_flags) {
5751                 si_enable_dma_pg(rdev, false);
5752                 si_enable_gfx_cgpg(rdev, false);
5753         }
5754 }
5755
5756 /*
5757  * RLC
5758  */
5759 void si_rlc_reset(struct radeon_device *rdev)
5760 {
5761         u32 tmp = RREG32(GRBM_SOFT_RESET);
5762
5763         tmp |= SOFT_RESET_RLC;
5764         WREG32(GRBM_SOFT_RESET, tmp);
5765         udelay(50);
5766         tmp &= ~SOFT_RESET_RLC;
5767         WREG32(GRBM_SOFT_RESET, tmp);
5768         udelay(50);
5769 }
5770
5771 static void si_rlc_stop(struct radeon_device *rdev)
5772 {
5773         WREG32(RLC_CNTL, 0);
5774
5775         si_enable_gui_idle_interrupt(rdev, false);
5776
5777         si_wait_for_rlc_serdes(rdev);
5778 }
5779
5780 static void si_rlc_start(struct radeon_device *rdev)
5781 {
5782         WREG32(RLC_CNTL, RLC_ENABLE);
5783
5784         si_enable_gui_idle_interrupt(rdev, true);
5785
5786         udelay(50);
5787 }
5788
5789 static bool si_lbpw_supported(struct radeon_device *rdev)
5790 {
5791         u32 tmp;
5792
5793         /* Enable LBPW only for DDR3 */
5794         tmp = RREG32(MC_SEQ_MISC0);
5795         if ((tmp & 0xF0000000) == 0xB0000000)
5796                 return true;
5797         return false;
5798 }
5799
5800 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5801 {
5802         u32 tmp;
5803
5804         tmp = RREG32(RLC_LB_CNTL);
5805         if (enable)
5806                 tmp |= LOAD_BALANCE_ENABLE;
5807         else
5808                 tmp &= ~LOAD_BALANCE_ENABLE;
5809         WREG32(RLC_LB_CNTL, tmp);
5810
5811         if (!enable) {
5812                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5813                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5814         }
5815 }
5816
5817 static int si_rlc_resume(struct radeon_device *rdev)
5818 {
5819         u32 i;
5820
5821         if (!rdev->rlc_fw)
5822                 return -EINVAL;
5823
5824         si_rlc_stop(rdev);
5825
5826         si_rlc_reset(rdev);
5827
5828         si_init_pg(rdev);
5829
5830         si_init_cg(rdev);
5831
5832         WREG32(RLC_RL_BASE, 0);
5833         WREG32(RLC_RL_SIZE, 0);
5834         WREG32(RLC_LB_CNTL, 0);
5835         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5836         WREG32(RLC_LB_CNTR_INIT, 0);
5837         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5838
5839         WREG32(RLC_MC_CNTL, 0);
5840         WREG32(RLC_UCODE_CNTL, 0);
5841
5842         if (rdev->new_fw) {
5843                 const struct rlc_firmware_header_v1_0 *hdr =
5844                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5845                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5846                 const __le32 *fw_data = (const __le32 *)
5847                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5848
5849                 radeon_ucode_print_rlc_hdr(&hdr->header);
5850
5851                 for (i = 0; i < fw_size; i++) {
5852                         WREG32(RLC_UCODE_ADDR, i);
5853                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5854                 }
5855         } else {
5856                 const __be32 *fw_data =
5857                         (const __be32 *)rdev->rlc_fw->data;
5858                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5859                         WREG32(RLC_UCODE_ADDR, i);
5860                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5861                 }
5862         }
5863         WREG32(RLC_UCODE_ADDR, 0);
5864
5865         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5866
5867         si_rlc_start(rdev);
5868
5869         return 0;
5870 }
5871
5872 static void si_enable_interrupts(struct radeon_device *rdev)
5873 {
5874         u32 ih_cntl = RREG32(IH_CNTL);
5875         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5876
5877         ih_cntl |= ENABLE_INTR;
5878         ih_rb_cntl |= IH_RB_ENABLE;
5879         WREG32(IH_CNTL, ih_cntl);
5880         WREG32(IH_RB_CNTL, ih_rb_cntl);
5881         rdev->ih.enabled = true;
5882 }
5883
5884 static void si_disable_interrupts(struct radeon_device *rdev)
5885 {
5886         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5887         u32 ih_cntl = RREG32(IH_CNTL);
5888
5889         ih_rb_cntl &= ~IH_RB_ENABLE;
5890         ih_cntl &= ~ENABLE_INTR;
5891         WREG32(IH_RB_CNTL, ih_rb_cntl);
5892         WREG32(IH_CNTL, ih_cntl);
5893         /* set rptr, wptr to 0 */
5894         WREG32(IH_RB_RPTR, 0);
5895         WREG32(IH_RB_WPTR, 0);
5896         rdev->ih.enabled = false;
5897         rdev->ih.rptr = 0;
5898 }
5899
5900 static void si_disable_interrupt_state(struct radeon_device *rdev)
5901 {
5902         u32 tmp;
5903
5904         tmp = RREG32(CP_INT_CNTL_RING0) &
5905                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5906         WREG32(CP_INT_CNTL_RING0, tmp);
5907         WREG32(CP_INT_CNTL_RING1, 0);
5908         WREG32(CP_INT_CNTL_RING2, 0);
5909         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5910         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5911         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5912         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5913         WREG32(GRBM_INT_CNTL, 0);
5914         if (rdev->num_crtc >= 2) {
5915                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5916                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5917         }
5918         if (rdev->num_crtc >= 4) {
5919                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5920                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5921         }
5922         if (rdev->num_crtc >= 6) {
5923                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5924                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5925         }
5926
5927         if (rdev->num_crtc >= 2) {
5928                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5929                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5930         }
5931         if (rdev->num_crtc >= 4) {
5932                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5933                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5934         }
5935         if (rdev->num_crtc >= 6) {
5936                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5937                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5938         }
5939
5940         if (!ASIC_IS_NODCE(rdev)) {
5941                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5942
5943                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5944                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5945                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5946                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5947                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5948                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5949                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5950                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5951                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5952                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5953                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5954                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5955         }
5956 }
5957
5958 static int si_irq_init(struct radeon_device *rdev)
5959 {
5960         int ret = 0;
5961         int rb_bufsz;
5962         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5963
5964         /* allocate ring */
5965         ret = r600_ih_ring_alloc(rdev);
5966         if (ret)
5967                 return ret;
5968
5969         /* disable irqs */
5970         si_disable_interrupts(rdev);
5971
5972         /* init rlc */
5973         ret = si_rlc_resume(rdev);
5974         if (ret) {
5975                 r600_ih_ring_fini(rdev);
5976                 return ret;
5977         }
5978
5979         /* setup interrupt control */
5980         /* set dummy read address to ring address */
5981         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5982         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5983         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5984          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5985          */
5986         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5987         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5988         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5989         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5990
5991         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5992         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5993
5994         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5995                       IH_WPTR_OVERFLOW_CLEAR |
5996                       (rb_bufsz << 1));
5997
5998         if (rdev->wb.enabled)
5999                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6000
6001         /* set the writeback address whether it's enabled or not */
6002         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6003         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6004
6005         WREG32(IH_RB_CNTL, ih_rb_cntl);
6006
6007         /* set rptr, wptr to 0 */
6008         WREG32(IH_RB_RPTR, 0);
6009         WREG32(IH_RB_WPTR, 0);
6010
6011         /* Default settings for IH_CNTL (disabled at first) */
6012         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6013         /* RPTR_REARM only works if msi's are enabled */
6014         if (rdev->msi_enabled)
6015                 ih_cntl |= RPTR_REARM;
6016         WREG32(IH_CNTL, ih_cntl);
6017
6018         /* force the active interrupt state to all disabled */
6019         si_disable_interrupt_state(rdev);
6020
6021         pci_set_master(rdev->pdev);
6022
6023         /* enable irqs */
6024         si_enable_interrupts(rdev);
6025
6026         return ret;
6027 }
6028
6029 int si_irq_set(struct radeon_device *rdev)
6030 {
6031         u32 cp_int_cntl;
6032         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6033         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6034         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6035         u32 grbm_int_cntl = 0;
6036         u32 dma_cntl, dma_cntl1;
6037         u32 thermal_int = 0;
6038
6039         if (!rdev->irq.installed) {
6040                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6041                 return -EINVAL;
6042         }
6043         /* don't enable anything if the ih is disabled */
6044         if (!rdev->ih.enabled) {
6045                 si_disable_interrupts(rdev);
6046                 /* force the active interrupt state to all disabled */
6047                 si_disable_interrupt_state(rdev);
6048                 return 0;
6049         }
6050
6051         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6052                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6053
6054         if (!ASIC_IS_NODCE(rdev)) {
6055                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6056                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6057                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6058                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6059                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6060                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6061         }
6062
6063         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6064         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6065
6066         thermal_int = RREG32(CG_THERMAL_INT) &
6067                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6068
6069         /* enable CP interrupts on all rings */
6070         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6071                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6072                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6073         }
6074         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6075                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6076                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6077         }
6078         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6079                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6080                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6081         }
6082         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6083                 DRM_DEBUG("si_irq_set: sw int dma\n");
6084                 dma_cntl |= TRAP_ENABLE;
6085         }
6086
6087         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6088                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6089                 dma_cntl1 |= TRAP_ENABLE;
6090         }
6091         if (rdev->irq.crtc_vblank_int[0] ||
6092             atomic_read(&rdev->irq.pflip[0])) {
6093                 DRM_DEBUG("si_irq_set: vblank 0\n");
6094                 crtc1 |= VBLANK_INT_MASK;
6095         }
6096         if (rdev->irq.crtc_vblank_int[1] ||
6097             atomic_read(&rdev->irq.pflip[1])) {
6098                 DRM_DEBUG("si_irq_set: vblank 1\n");
6099                 crtc2 |= VBLANK_INT_MASK;
6100         }
6101         if (rdev->irq.crtc_vblank_int[2] ||
6102             atomic_read(&rdev->irq.pflip[2])) {
6103                 DRM_DEBUG("si_irq_set: vblank 2\n");
6104                 crtc3 |= VBLANK_INT_MASK;
6105         }
6106         if (rdev->irq.crtc_vblank_int[3] ||
6107             atomic_read(&rdev->irq.pflip[3])) {
6108                 DRM_DEBUG("si_irq_set: vblank 3\n");
6109                 crtc4 |= VBLANK_INT_MASK;
6110         }
6111         if (rdev->irq.crtc_vblank_int[4] ||
6112             atomic_read(&rdev->irq.pflip[4])) {
6113                 DRM_DEBUG("si_irq_set: vblank 4\n");
6114                 crtc5 |= VBLANK_INT_MASK;
6115         }
6116         if (rdev->irq.crtc_vblank_int[5] ||
6117             atomic_read(&rdev->irq.pflip[5])) {
6118                 DRM_DEBUG("si_irq_set: vblank 5\n");
6119                 crtc6 |= VBLANK_INT_MASK;
6120         }
6121         if (rdev->irq.hpd[0]) {
6122                 DRM_DEBUG("si_irq_set: hpd 1\n");
6123                 hpd1 |= DC_HPDx_INT_EN;
6124         }
6125         if (rdev->irq.hpd[1]) {
6126                 DRM_DEBUG("si_irq_set: hpd 2\n");
6127                 hpd2 |= DC_HPDx_INT_EN;
6128         }
6129         if (rdev->irq.hpd[2]) {
6130                 DRM_DEBUG("si_irq_set: hpd 3\n");
6131                 hpd3 |= DC_HPDx_INT_EN;
6132         }
6133         if (rdev->irq.hpd[3]) {
6134                 DRM_DEBUG("si_irq_set: hpd 4\n");
6135                 hpd4 |= DC_HPDx_INT_EN;
6136         }
6137         if (rdev->irq.hpd[4]) {
6138                 DRM_DEBUG("si_irq_set: hpd 5\n");
6139                 hpd5 |= DC_HPDx_INT_EN;
6140         }
6141         if (rdev->irq.hpd[5]) {
6142                 DRM_DEBUG("si_irq_set: hpd 6\n");
6143                 hpd6 |= DC_HPDx_INT_EN;
6144         }
6145
6146         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6147         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6148         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6149
6150         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6151         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6152
6153         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6154
6155         if (rdev->irq.dpm_thermal) {
6156                 DRM_DEBUG("dpm thermal\n");
6157                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6158         }
6159
6160         if (rdev->num_crtc >= 2) {
6161                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6162                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6163         }
6164         if (rdev->num_crtc >= 4) {
6165                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6166                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6167         }
6168         if (rdev->num_crtc >= 6) {
6169                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6170                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6171         }
6172
6173         if (rdev->num_crtc >= 2) {
6174                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6175                        GRPH_PFLIP_INT_MASK);
6176                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6177                        GRPH_PFLIP_INT_MASK);
6178         }
6179         if (rdev->num_crtc >= 4) {
6180                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6181                        GRPH_PFLIP_INT_MASK);
6182                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6183                        GRPH_PFLIP_INT_MASK);
6184         }
6185         if (rdev->num_crtc >= 6) {
6186                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6187                        GRPH_PFLIP_INT_MASK);
6188                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6189                        GRPH_PFLIP_INT_MASK);
6190         }
6191
6192         if (!ASIC_IS_NODCE(rdev)) {
6193                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6194                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6195                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6196                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6197                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6198                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6199         }
6200
6201         WREG32(CG_THERMAL_INT, thermal_int);
6202
6203         return 0;
6204 }
6205
6206 static inline void si_irq_ack(struct radeon_device *rdev)
6207 {
6208         u32 tmp;
6209
6210         if (ASIC_IS_NODCE(rdev))
6211                 return;
6212
6213         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6214         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6215         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6216         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6217         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6218         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6219         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6220         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6221         if (rdev->num_crtc >= 4) {
6222                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6223                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6224         }
6225         if (rdev->num_crtc >= 6) {
6226                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6227                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6228         }
6229
6230         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6231                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6232         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6233                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6234         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6235                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6236         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6237                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6238         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6239                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6240         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6241                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6242
6243         if (rdev->num_crtc >= 4) {
6244                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6245                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6246                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6247                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6248                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6249                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6250                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6251                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6252                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6253                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6254                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6255                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6256         }
6257
6258         if (rdev->num_crtc >= 6) {
6259                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6260                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6261                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6262                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6263                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6264                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6265                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6266                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6267                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6268                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6269                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6270                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6271         }
6272
6273         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6274                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6275                 tmp |= DC_HPDx_INT_ACK;
6276                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6277         }
6278         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6279                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6280                 tmp |= DC_HPDx_INT_ACK;
6281                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6282         }
6283         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6284                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6285                 tmp |= DC_HPDx_INT_ACK;
6286                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6287         }
6288         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6289                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6290                 tmp |= DC_HPDx_INT_ACK;
6291                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6292         }
6293         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6294                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6295                 tmp |= DC_HPDx_INT_ACK;
6296                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6297         }
6298         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6299                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6300                 tmp |= DC_HPDx_INT_ACK;
6301                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6302         }
6303 }
6304
6305 static void si_irq_disable(struct radeon_device *rdev)
6306 {
6307         si_disable_interrupts(rdev);
6308         /* Wait and acknowledge irq */
6309         mdelay(1);
6310         si_irq_ack(rdev);
6311         si_disable_interrupt_state(rdev);
6312 }
6313
6314 static void si_irq_suspend(struct radeon_device *rdev)
6315 {
6316         si_irq_disable(rdev);
6317         si_rlc_stop(rdev);
6318 }
6319
6320 static void si_irq_fini(struct radeon_device *rdev)
6321 {
6322         si_irq_suspend(rdev);
6323         r600_ih_ring_fini(rdev);
6324 }
6325
6326 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6327 {
6328         u32 wptr, tmp;
6329
6330         if (rdev->wb.enabled)
6331                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6332         else
6333                 wptr = RREG32(IH_RB_WPTR);
6334
6335         if (wptr & RB_OVERFLOW) {
6336                 wptr &= ~RB_OVERFLOW;
6337                 /* When a ring buffer overflow happen start parsing interrupt
6338                  * from the last not overwritten vector (wptr + 16). Hopefully
6339                  * this should allow us to catchup.
6340                  */
6341                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6342                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6343                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6344                 tmp = RREG32(IH_RB_CNTL);
6345                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6346                 WREG32(IH_RB_CNTL, tmp);
6347         }
6348         return (wptr & rdev->ih.ptr_mask);
6349 }
6350
6351 /*        SI IV Ring
6352  * Each IV ring entry is 128 bits:
6353  * [7:0]    - interrupt source id
6354  * [31:8]   - reserved
6355  * [59:32]  - interrupt source data
6356  * [63:60]  - reserved
6357  * [71:64]  - RINGID
6358  * [79:72]  - VMID
6359  * [127:80] - reserved
6360  */
6361 int si_irq_process(struct radeon_device *rdev)
6362 {
6363         u32 wptr;
6364         u32 rptr;
6365         u32 src_id, src_data, ring_id;
6366         u32 ring_index;
6367         bool queue_hotplug = false;
6368         bool queue_thermal = false;
6369         u32 status, addr;
6370
6371         if (!rdev->ih.enabled || rdev->shutdown)
6372                 return IRQ_NONE;
6373
6374         wptr = si_get_ih_wptr(rdev);
6375
6376 restart_ih:
6377         /* is somebody else already processing irqs? */
6378         if (atomic_xchg(&rdev->ih.lock, 1))
6379                 return IRQ_NONE;
6380
6381         rptr = rdev->ih.rptr;
6382         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6383
6384         /* Order reading of wptr vs. reading of IH ring data */
6385         rmb();
6386
6387         /* display interrupts */
6388         si_irq_ack(rdev);
6389
6390         while (rptr != wptr) {
6391                 /* wptr/rptr are in bytes! */
6392                 ring_index = rptr / 4;
6393                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6394                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6395                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6396
6397                 switch (src_id) {
6398                 case 1: /* D1 vblank/vline */
6399                         switch (src_data) {
6400                         case 0: /* D1 vblank */
6401                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6402                                         if (rdev->irq.crtc_vblank_int[0]) {
6403                                                 drm_handle_vblank(rdev->ddev, 0);
6404                                                 rdev->pm.vblank_sync = true;
6405                                                 wake_up(&rdev->irq.vblank_queue);
6406                                         }
6407                                         if (atomic_read(&rdev->irq.pflip[0]))
6408                                                 radeon_crtc_handle_vblank(rdev, 0);
6409                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6410                                         DRM_DEBUG("IH: D1 vblank\n");
6411                                 }
6412                                 break;
6413                         case 1: /* D1 vline */
6414                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6415                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6416                                         DRM_DEBUG("IH: D1 vline\n");
6417                                 }
6418                                 break;
6419                         default:
6420                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6421                                 break;
6422                         }
6423                         break;
6424                 case 2: /* D2 vblank/vline */
6425                         switch (src_data) {
6426                         case 0: /* D2 vblank */
6427                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6428                                         if (rdev->irq.crtc_vblank_int[1]) {
6429                                                 drm_handle_vblank(rdev->ddev, 1);
6430                                                 rdev->pm.vblank_sync = true;
6431                                                 wake_up(&rdev->irq.vblank_queue);
6432                                         }
6433                                         if (atomic_read(&rdev->irq.pflip[1]))
6434                                                 radeon_crtc_handle_vblank(rdev, 1);
6435                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6436                                         DRM_DEBUG("IH: D2 vblank\n");
6437                                 }
6438                                 break;
6439                         case 1: /* D2 vline */
6440                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6441                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6442                                         DRM_DEBUG("IH: D2 vline\n");
6443                                 }
6444                                 break;
6445                         default:
6446                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6447                                 break;
6448                         }
6449                         break;
6450                 case 3: /* D3 vblank/vline */
6451                         switch (src_data) {
6452                         case 0: /* D3 vblank */
6453                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6454                                         if (rdev->irq.crtc_vblank_int[2]) {
6455                                                 drm_handle_vblank(rdev->ddev, 2);
6456                                                 rdev->pm.vblank_sync = true;
6457                                                 wake_up(&rdev->irq.vblank_queue);
6458                                         }
6459                                         if (atomic_read(&rdev->irq.pflip[2]))
6460                                                 radeon_crtc_handle_vblank(rdev, 2);
6461                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6462                                         DRM_DEBUG("IH: D3 vblank\n");
6463                                 }
6464                                 break;
6465                         case 1: /* D3 vline */
6466                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6467                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6468                                         DRM_DEBUG("IH: D3 vline\n");
6469                                 }
6470                                 break;
6471                         default:
6472                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6473                                 break;
6474                         }
6475                         break;
6476                 case 4: /* D4 vblank/vline */
6477                         switch (src_data) {
6478                         case 0: /* D4 vblank */
6479                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6480                                         if (rdev->irq.crtc_vblank_int[3]) {
6481                                                 drm_handle_vblank(rdev->ddev, 3);
6482                                                 rdev->pm.vblank_sync = true;
6483                                                 wake_up(&rdev->irq.vblank_queue);
6484                                         }
6485                                         if (atomic_read(&rdev->irq.pflip[3]))
6486                                                 radeon_crtc_handle_vblank(rdev, 3);
6487                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6488                                         DRM_DEBUG("IH: D4 vblank\n");
6489                                 }
6490                                 break;
6491                         case 1: /* D4 vline */
6492                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6493                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6494                                         DRM_DEBUG("IH: D4 vline\n");
6495                                 }
6496                                 break;
6497                         default:
6498                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6499                                 break;
6500                         }
6501                         break;
6502                 case 5: /* D5 vblank/vline */
6503                         switch (src_data) {
6504                         case 0: /* D5 vblank */
6505                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6506                                         if (rdev->irq.crtc_vblank_int[4]) {
6507                                                 drm_handle_vblank(rdev->ddev, 4);
6508                                                 rdev->pm.vblank_sync = true;
6509                                                 wake_up(&rdev->irq.vblank_queue);
6510                                         }
6511                                         if (atomic_read(&rdev->irq.pflip[4]))
6512                                                 radeon_crtc_handle_vblank(rdev, 4);
6513                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6514                                         DRM_DEBUG("IH: D5 vblank\n");
6515                                 }
6516                                 break;
6517                         case 1: /* D5 vline */
6518                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6519                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6520                                         DRM_DEBUG("IH: D5 vline\n");
6521                                 }
6522                                 break;
6523                         default:
6524                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6525                                 break;
6526                         }
6527                         break;
6528                 case 6: /* D6 vblank/vline */
6529                         switch (src_data) {
6530                         case 0: /* D6 vblank */
6531                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6532                                         if (rdev->irq.crtc_vblank_int[5]) {
6533                                                 drm_handle_vblank(rdev->ddev, 5);
6534                                                 rdev->pm.vblank_sync = true;
6535                                                 wake_up(&rdev->irq.vblank_queue);
6536                                         }
6537                                         if (atomic_read(&rdev->irq.pflip[5]))
6538                                                 radeon_crtc_handle_vblank(rdev, 5);
6539                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6540                                         DRM_DEBUG("IH: D6 vblank\n");
6541                                 }
6542                                 break;
6543                         case 1: /* D6 vline */
6544                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6545                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6546                                         DRM_DEBUG("IH: D6 vline\n");
6547                                 }
6548                                 break;
6549                         default:
6550                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6551                                 break;
6552                         }
6553                         break;
6554                 case 8: /* D1 page flip */
6555                 case 10: /* D2 page flip */
6556                 case 12: /* D3 page flip */
6557                 case 14: /* D4 page flip */
6558                 case 16: /* D5 page flip */
6559                 case 18: /* D6 page flip */
6560                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6561                         if (radeon_use_pflipirq > 0)
6562                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6563                         break;
6564                 case 42: /* HPD hotplug */
6565                         switch (src_data) {
6566                         case 0:
6567                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6568                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6569                                         queue_hotplug = true;
6570                                         DRM_DEBUG("IH: HPD1\n");
6571                                 }
6572                                 break;
6573                         case 1:
6574                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6575                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6576                                         queue_hotplug = true;
6577                                         DRM_DEBUG("IH: HPD2\n");
6578                                 }
6579                                 break;
6580                         case 2:
6581                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6582                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6583                                         queue_hotplug = true;
6584                                         DRM_DEBUG("IH: HPD3\n");
6585                                 }
6586                                 break;
6587                         case 3:
6588                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6589                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6590                                         queue_hotplug = true;
6591                                         DRM_DEBUG("IH: HPD4\n");
6592                                 }
6593                                 break;
6594                         case 4:
6595                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6596                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6597                                         queue_hotplug = true;
6598                                         DRM_DEBUG("IH: HPD5\n");
6599                                 }
6600                                 break;
6601                         case 5:
6602                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6603                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6604                                         queue_hotplug = true;
6605                                         DRM_DEBUG("IH: HPD6\n");
6606                                 }
6607                                 break;
6608                         default:
6609                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6610                                 break;
6611                         }
6612                         break;
6613                 case 124: /* UVD */
6614                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6615                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6616                         break;
6617                 case 146:
6618                 case 147:
6619                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6620                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6621                         /* reset addr and status */
6622                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6623                         if (addr == 0x0 && status == 0x0)
6624                                 break;
6625                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6626                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6627                                 addr);
6628                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6629                                 status);
6630                         si_vm_decode_fault(rdev, status, addr);
6631                         break;
6632                 case 176: /* RINGID0 CP_INT */
6633                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6634                         break;
6635                 case 177: /* RINGID1 CP_INT */
6636                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6637                         break;
6638                 case 178: /* RINGID2 CP_INT */
6639                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6640                         break;
6641                 case 181: /* CP EOP event */
6642                         DRM_DEBUG("IH: CP EOP\n");
6643                         switch (ring_id) {
6644                         case 0:
6645                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6646                                 break;
6647                         case 1:
6648                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6649                                 break;
6650                         case 2:
6651                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6652                                 break;
6653                         }
6654                         break;
6655                 case 224: /* DMA trap event */
6656                         DRM_DEBUG("IH: DMA trap\n");
6657                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6658                         break;
6659                 case 230: /* thermal low to high */
6660                         DRM_DEBUG("IH: thermal low to high\n");
6661                         rdev->pm.dpm.thermal.high_to_low = false;
6662                         queue_thermal = true;
6663                         break;
6664                 case 231: /* thermal high to low */
6665                         DRM_DEBUG("IH: thermal high to low\n");
6666                         rdev->pm.dpm.thermal.high_to_low = true;
6667                         queue_thermal = true;
6668                         break;
6669                 case 233: /* GUI IDLE */
6670                         DRM_DEBUG("IH: GUI idle\n");
6671                         break;
6672                 case 244: /* DMA trap event */
6673                         DRM_DEBUG("IH: DMA1 trap\n");
6674                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6675                         break;
6676                 default:
6677                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6678                         break;
6679                 }
6680
6681                 /* wptr/rptr are in bytes! */
6682                 rptr += 16;
6683                 rptr &= rdev->ih.ptr_mask;
6684                 WREG32(IH_RB_RPTR, rptr);
6685         }
6686         if (queue_hotplug)
6687                 schedule_work(&rdev->hotplug_work);
6688         if (queue_thermal && rdev->pm.dpm_enabled)
6689                 schedule_work(&rdev->pm.dpm.thermal.work);
6690         rdev->ih.rptr = rptr;
6691         atomic_set(&rdev->ih.lock, 0);
6692
6693         /* make sure wptr hasn't changed while processing */
6694         wptr = si_get_ih_wptr(rdev);
6695         if (wptr != rptr)
6696                 goto restart_ih;
6697
6698         return IRQ_HANDLED;
6699 }
6700
6701 /*
6702  * startup/shutdown callbacks
6703  */
6704 static int si_startup(struct radeon_device *rdev)
6705 {
6706         struct radeon_ring *ring;
6707         int r;
6708
6709         /* enable pcie gen2/3 link */
6710         si_pcie_gen3_enable(rdev);
6711         /* enable aspm */
6712         si_program_aspm(rdev);
6713
6714         /* scratch needs to be initialized before MC */
6715         r = r600_vram_scratch_init(rdev);
6716         if (r)
6717                 return r;
6718
6719         si_mc_program(rdev);
6720
6721         if (!rdev->pm.dpm_enabled) {
6722                 r = si_mc_load_microcode(rdev);
6723                 if (r) {
6724                         DRM_ERROR("Failed to load MC firmware!\n");
6725                         return r;
6726                 }
6727         }
6728
6729         r = si_pcie_gart_enable(rdev);
6730         if (r)
6731                 return r;
6732         si_gpu_init(rdev);
6733
6734         /* allocate rlc buffers */
6735         if (rdev->family == CHIP_VERDE) {
6736                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6737                 rdev->rlc.reg_list_size =
6738                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6739         }
6740         rdev->rlc.cs_data = si_cs_data;
6741         r = sumo_rlc_init(rdev);
6742         if (r) {
6743                 DRM_ERROR("Failed to init rlc BOs!\n");
6744                 return r;
6745         }
6746
6747         /* allocate wb buffer */
6748         r = radeon_wb_init(rdev);
6749         if (r)
6750                 return r;
6751
6752         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6753         if (r) {
6754                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6755                 return r;
6756         }
6757
6758         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6759         if (r) {
6760                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6761                 return r;
6762         }
6763
6764         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6765         if (r) {
6766                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6767                 return r;
6768         }
6769
6770         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6771         if (r) {
6772                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6773                 return r;
6774         }
6775
6776         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6777         if (r) {
6778                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6779                 return r;
6780         }
6781
6782         if (rdev->has_uvd) {
6783                 r = uvd_v2_2_resume(rdev);
6784                 if (!r) {
6785                         r = radeon_fence_driver_start_ring(rdev,
6786                                                            R600_RING_TYPE_UVD_INDEX);
6787                         if (r)
6788                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6789                 }
6790                 if (r)
6791                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6792         }
6793
6794         /* Enable IRQ */
6795         if (!rdev->irq.installed) {
6796                 r = radeon_irq_kms_init(rdev);
6797                 if (r)
6798                         return r;
6799         }
6800
6801         r = si_irq_init(rdev);
6802         if (r) {
6803                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6804                 radeon_irq_kms_fini(rdev);
6805                 return r;
6806         }
6807         si_irq_set(rdev);
6808
6809         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6810         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6811                              RADEON_CP_PACKET2);
6812         if (r)
6813                 return r;
6814
6815         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6816         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6817                              RADEON_CP_PACKET2);
6818         if (r)
6819                 return r;
6820
6821         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6822         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6823                              RADEON_CP_PACKET2);
6824         if (r)
6825                 return r;
6826
6827         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6828         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6829                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6830         if (r)
6831                 return r;
6832
6833         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6834         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6835                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6836         if (r)
6837                 return r;
6838
6839         r = si_cp_load_microcode(rdev);
6840         if (r)
6841                 return r;
6842         r = si_cp_resume(rdev);
6843         if (r)
6844                 return r;
6845
6846         r = cayman_dma_resume(rdev);
6847         if (r)
6848                 return r;
6849
6850         if (rdev->has_uvd) {
6851                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6852                 if (ring->ring_size) {
6853                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6854                                              RADEON_CP_PACKET2);
6855                         if (!r)
6856                                 r = uvd_v1_0_init(rdev);
6857                         if (r)
6858                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6859                 }
6860         }
6861
6862         r = radeon_ib_pool_init(rdev);
6863         if (r) {
6864                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6865                 return r;
6866         }
6867
6868         r = radeon_vm_manager_init(rdev);
6869         if (r) {
6870                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6871                 return r;
6872         }
6873
6874         r = radeon_audio_init(rdev);
6875         if (r)
6876                 return r;
6877
6878         return 0;
6879 }
6880
6881 int si_resume(struct radeon_device *rdev)
6882 {
6883         int r;
6884
6885         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6886          * posting will perform necessary task to bring back GPU into good
6887          * shape.
6888          */
6889         /* post card */
6890         atom_asic_init(rdev->mode_info.atom_context);
6891
6892         /* init golden registers */
6893         si_init_golden_registers(rdev);
6894
6895         if (rdev->pm.pm_method == PM_METHOD_DPM)
6896                 radeon_pm_resume(rdev);
6897
6898         rdev->accel_working = true;
6899         r = si_startup(rdev);
6900         if (r) {
6901                 DRM_ERROR("si startup failed on resume\n");
6902                 rdev->accel_working = false;
6903                 return r;
6904         }
6905
6906         return r;
6907
6908 }
6909
6910 int si_suspend(struct radeon_device *rdev)
6911 {
6912         radeon_pm_suspend(rdev);
6913         radeon_audio_fini(rdev);
6914         radeon_vm_manager_fini(rdev);
6915         si_cp_enable(rdev, false);
6916         cayman_dma_stop(rdev);
6917         if (rdev->has_uvd) {
6918                 uvd_v1_0_fini(rdev);
6919                 radeon_uvd_suspend(rdev);
6920         }
6921         si_fini_pg(rdev);
6922         si_fini_cg(rdev);
6923         si_irq_suspend(rdev);
6924         radeon_wb_disable(rdev);
6925         si_pcie_gart_disable(rdev);
6926         return 0;
6927 }
6928
6929 /* Plan is to move initialization in that function and use
6930  * helper function so that radeon_device_init pretty much
6931  * do nothing more than calling asic specific function. This
6932  * should also allow to remove a bunch of callback function
6933  * like vram_info.
6934  */
6935 int si_init(struct radeon_device *rdev)
6936 {
6937         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6938         int r;
6939
6940         /* Read BIOS */
6941         if (!radeon_get_bios(rdev)) {
6942                 if (ASIC_IS_AVIVO(rdev))
6943                         return -EINVAL;
6944         }
6945         /* Must be an ATOMBIOS */
6946         if (!rdev->is_atom_bios) {
6947                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6948                 return -EINVAL;
6949         }
6950         r = radeon_atombios_init(rdev);
6951         if (r)
6952                 return r;
6953
6954         /* Post card if necessary */
6955         if (!radeon_card_posted(rdev)) {
6956                 if (!rdev->bios) {
6957                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6958                         return -EINVAL;
6959                 }
6960                 DRM_INFO("GPU not posted. posting now...\n");
6961                 atom_asic_init(rdev->mode_info.atom_context);
6962         }
6963         /* init golden registers */
6964         si_init_golden_registers(rdev);
6965         /* Initialize scratch registers */
6966         si_scratch_init(rdev);
6967         /* Initialize surface registers */
6968         radeon_surface_init(rdev);
6969         /* Initialize clocks */
6970         radeon_get_clock_info(rdev->ddev);
6971
6972         /* Fence driver */
6973         r = radeon_fence_driver_init(rdev);
6974         if (r)
6975                 return r;
6976
6977         /* initialize memory controller */
6978         r = si_mc_init(rdev);
6979         if (r)
6980                 return r;
6981         /* Memory manager */
6982         r = radeon_bo_init(rdev);
6983         if (r)
6984                 return r;
6985
6986         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6987             !rdev->rlc_fw || !rdev->mc_fw) {
6988                 r = si_init_microcode(rdev);
6989                 if (r) {
6990                         DRM_ERROR("Failed to load firmware!\n");
6991                         return r;
6992                 }
6993         }
6994
6995         /* Initialize power management */
6996         radeon_pm_init(rdev);
6997
6998         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6999         ring->ring_obj = NULL;
7000         r600_ring_init(rdev, ring, 1024 * 1024);
7001
7002         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7003         ring->ring_obj = NULL;
7004         r600_ring_init(rdev, ring, 1024 * 1024);
7005
7006         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7007         ring->ring_obj = NULL;
7008         r600_ring_init(rdev, ring, 1024 * 1024);
7009
7010         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7011         ring->ring_obj = NULL;
7012         r600_ring_init(rdev, ring, 64 * 1024);
7013
7014         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7015         ring->ring_obj = NULL;
7016         r600_ring_init(rdev, ring, 64 * 1024);
7017
7018         if (rdev->has_uvd) {
7019                 r = radeon_uvd_init(rdev);
7020                 if (!r) {
7021                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7022                         ring->ring_obj = NULL;
7023                         r600_ring_init(rdev, ring, 4096);
7024                 }
7025         }
7026
7027         rdev->ih.ring_obj = NULL;
7028         r600_ih_ring_init(rdev, 64 * 1024);
7029
7030         r = r600_pcie_gart_init(rdev);
7031         if (r)
7032                 return r;
7033
7034         rdev->accel_working = true;
7035         r = si_startup(rdev);
7036         if (r) {
7037                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7038                 si_cp_fini(rdev);
7039                 cayman_dma_fini(rdev);
7040                 si_irq_fini(rdev);
7041                 sumo_rlc_fini(rdev);
7042                 radeon_wb_fini(rdev);
7043                 radeon_ib_pool_fini(rdev);
7044                 radeon_vm_manager_fini(rdev);
7045                 radeon_irq_kms_fini(rdev);
7046                 si_pcie_gart_fini(rdev);
7047                 rdev->accel_working = false;
7048         }
7049
7050         /* Don't start up if the MC ucode is missing.
7051          * The default clocks and voltages before the MC ucode
7052          * is loaded are not suffient for advanced operations.
7053          */
7054         if (!rdev->mc_fw) {
7055                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7056                 return -EINVAL;
7057         }
7058
7059         return 0;
7060 }
7061
7062 void si_fini(struct radeon_device *rdev)
7063 {
7064         radeon_pm_fini(rdev);
7065         si_cp_fini(rdev);
7066         cayman_dma_fini(rdev);
7067         si_fini_pg(rdev);
7068         si_fini_cg(rdev);
7069         si_irq_fini(rdev);
7070         sumo_rlc_fini(rdev);
7071         radeon_wb_fini(rdev);
7072         radeon_vm_manager_fini(rdev);
7073         radeon_ib_pool_fini(rdev);
7074         radeon_irq_kms_fini(rdev);
7075         if (rdev->has_uvd) {
7076                 uvd_v1_0_fini(rdev);
7077                 radeon_uvd_fini(rdev);
7078         }
7079         si_pcie_gart_fini(rdev);
7080         r600_vram_scratch_fini(rdev);
7081         radeon_gem_fini(rdev);
7082         radeon_fence_driver_fini(rdev);
7083         radeon_bo_fini(rdev);
7084         radeon_atombios_fini(rdev);
7085         kfree(rdev->bios);
7086         rdev->bios = NULL;
7087 }
7088
7089 /**
7090  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7091  *
7092  * @rdev: radeon_device pointer
7093  *
7094  * Fetches a GPU clock counter snapshot (SI).
7095  * Returns the 64 bit clock counter snapshot.
7096  */
7097 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7098 {
7099         uint64_t clock;
7100
7101         mutex_lock(&rdev->gpu_clock_mutex);
7102         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7103         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7104                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7105         mutex_unlock(&rdev->gpu_clock_mutex);
7106         return clock;
7107 }
7108
7109 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7110 {
7111         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7112         int r;
7113
7114         /* bypass vclk and dclk with bclk */
7115         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7116                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7117                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7118
7119         /* put PLL in bypass mode */
7120         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7121
7122         if (!vclk || !dclk) {
7123                 /* keep the Bypass mode, put PLL to sleep */
7124                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7125                 return 0;
7126         }
7127
7128         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7129                                           16384, 0x03FFFFFF, 0, 128, 5,
7130                                           &fb_div, &vclk_div, &dclk_div);
7131         if (r)
7132                 return r;
7133
7134         /* set RESET_ANTI_MUX to 0 */
7135         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7136
7137         /* set VCO_MODE to 1 */
7138         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7139
7140         /* toggle UPLL_SLEEP to 1 then back to 0 */
7141         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7142         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7143
7144         /* deassert UPLL_RESET */
7145         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7146
7147         mdelay(1);
7148
7149         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7150         if (r)
7151                 return r;
7152
7153         /* assert UPLL_RESET again */
7154         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7155
7156         /* disable spread spectrum. */
7157         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7158
7159         /* set feedback divider */
7160         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7161
7162         /* set ref divider to 0 */
7163         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7164
7165         if (fb_div < 307200)
7166                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7167         else
7168                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7169
7170         /* set PDIV_A and PDIV_B */
7171         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7172                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7173                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7174
7175         /* give the PLL some time to settle */
7176         mdelay(15);
7177
7178         /* deassert PLL_RESET */
7179         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7180
7181         mdelay(15);
7182
7183         /* switch from bypass mode to normal mode */
7184         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7185
7186         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7187         if (r)
7188                 return r;
7189
7190         /* switch VCLK and DCLK selection */
7191         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7192                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7193                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7194
7195         mdelay(100);
7196
7197         return 0;
7198 }
7199
7200 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7201 {
7202         struct pci_dev *root = rdev->pdev->bus->self;
7203         int bridge_pos, gpu_pos;
7204         u32 speed_cntl, mask, current_data_rate;
7205         int ret, i;
7206         u16 tmp16;
7207
7208         if (pci_is_root_bus(rdev->pdev->bus))
7209                 return;
7210
7211         if (radeon_pcie_gen2 == 0)
7212                 return;
7213
7214         if (rdev->flags & RADEON_IS_IGP)
7215                 return;
7216
7217         if (!(rdev->flags & RADEON_IS_PCIE))
7218                 return;
7219
7220         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7221         if (ret != 0)
7222                 return;
7223
7224         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7225                 return;
7226
7227         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7228         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7229                 LC_CURRENT_DATA_RATE_SHIFT;
7230         if (mask & DRM_PCIE_SPEED_80) {
7231                 if (current_data_rate == 2) {
7232                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7233                         return;
7234                 }
7235                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7236         } else if (mask & DRM_PCIE_SPEED_50) {
7237                 if (current_data_rate == 1) {
7238                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7239                         return;
7240                 }
7241                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7242         }
7243
7244         bridge_pos = pci_pcie_cap(root);
7245         if (!bridge_pos)
7246                 return;
7247
7248         gpu_pos = pci_pcie_cap(rdev->pdev);
7249         if (!gpu_pos)
7250                 return;
7251
7252         if (mask & DRM_PCIE_SPEED_80) {
7253                 /* re-try equalization if gen3 is not already enabled */
7254                 if (current_data_rate != 2) {
7255                         u16 bridge_cfg, gpu_cfg;
7256                         u16 bridge_cfg2, gpu_cfg2;
7257                         u32 max_lw, current_lw, tmp;
7258
7259                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7260                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7261
7262                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7263                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7264
7265                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7266                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7267
7268                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7269                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7270                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7271
7272                         if (current_lw < max_lw) {
7273                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7274                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7275                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7276                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7277                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7278                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7279                                 }
7280                         }
7281
7282                         for (i = 0; i < 10; i++) {
7283                                 /* check status */
7284                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7285                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7286                                         break;
7287
7288                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7289                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7290
7291                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7292                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7293
7294                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7295                                 tmp |= LC_SET_QUIESCE;
7296                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7297
7298                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7299                                 tmp |= LC_REDO_EQ;
7300                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7301
7302                                 mdelay(100);
7303
7304                                 /* linkctl */
7305                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7306                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7307                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7308                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7309
7310                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7311                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7312                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7313                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7314
7315                                 /* linkctl2 */
7316                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7317                                 tmp16 &= ~((1 << 4) | (7 << 9));
7318                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7319                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7320
7321                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7322                                 tmp16 &= ~((1 << 4) | (7 << 9));
7323                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7324                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7325
7326                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7327                                 tmp &= ~LC_SET_QUIESCE;
7328                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7329                         }
7330                 }
7331         }
7332
7333         /* set the link speed */
7334         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7335         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7336         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7337
7338         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7339         tmp16 &= ~0xf;
7340         if (mask & DRM_PCIE_SPEED_80)
7341                 tmp16 |= 3; /* gen3 */
7342         else if (mask & DRM_PCIE_SPEED_50)
7343                 tmp16 |= 2; /* gen2 */
7344         else
7345                 tmp16 |= 1; /* gen1 */
7346         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7347
7348         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7349         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7350         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7351
7352         for (i = 0; i < rdev->usec_timeout; i++) {
7353                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7354                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7355                         break;
7356                 udelay(1);
7357         }
7358 }
7359
7360 static void si_program_aspm(struct radeon_device *rdev)
7361 {
7362         u32 data, orig;
7363         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7364         bool disable_clkreq = false;
7365
7366         if (radeon_aspm == 0)
7367                 return;
7368
7369         if (!(rdev->flags & RADEON_IS_PCIE))
7370                 return;
7371
7372         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7373         data &= ~LC_XMIT_N_FTS_MASK;
7374         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7375         if (orig != data)
7376                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7377
7378         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7379         data |= LC_GO_TO_RECOVERY;
7380         if (orig != data)
7381                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7382
7383         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7384         data |= P_IGNORE_EDB_ERR;
7385         if (orig != data)
7386                 WREG32_PCIE(PCIE_P_CNTL, data);
7387
7388         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7389         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7390         data |= LC_PMI_TO_L1_DIS;
7391         if (!disable_l0s)
7392                 data |= LC_L0S_INACTIVITY(7);
7393
7394         if (!disable_l1) {
7395                 data |= LC_L1_INACTIVITY(7);
7396                 data &= ~LC_PMI_TO_L1_DIS;
7397                 if (orig != data)
7398                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7399
7400                 if (!disable_plloff_in_l1) {
7401                         bool clk_req_support;
7402
7403                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7404                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7405                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7406                         if (orig != data)
7407                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7408
7409                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7410                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7411                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7412                         if (orig != data)
7413                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7414
7415                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7416                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7417                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7418                         if (orig != data)
7419                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7420
7421                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7422                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7423                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7424                         if (orig != data)
7425                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7426
7427                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7428                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7429                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7430                                 if (orig != data)
7431                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7432
7433                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7434                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7435                                 if (orig != data)
7436                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7437
7438                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7439                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7440                                 if (orig != data)
7441                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7442
7443                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7444                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7445                                 if (orig != data)
7446                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7447
7448                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7449                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7450                                 if (orig != data)
7451                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7452
7453                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7454                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7455                                 if (orig != data)
7456                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7457
7458                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7459                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7460                                 if (orig != data)
7461                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7462
7463                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7464                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7465                                 if (orig != data)
7466                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7467                         }
7468                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7469                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7470                         data |= LC_DYN_LANES_PWR_STATE(3);
7471                         if (orig != data)
7472                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7473
7474                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7475                         data &= ~LS2_EXIT_TIME_MASK;
7476                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7477                                 data |= LS2_EXIT_TIME(5);
7478                         if (orig != data)
7479                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7480
7481                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7482                         data &= ~LS2_EXIT_TIME_MASK;
7483                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7484                                 data |= LS2_EXIT_TIME(5);
7485                         if (orig != data)
7486                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7487
7488                         if (!disable_clkreq &&
7489                             !pci_is_root_bus(rdev->pdev->bus)) {
7490                                 struct pci_dev *root = rdev->pdev->bus->self;
7491                                 u32 lnkcap;
7492
7493                                 clk_req_support = false;
7494                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7495                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7496                                         clk_req_support = true;
7497                         } else {
7498                                 clk_req_support = false;
7499                         }
7500
7501                         if (clk_req_support) {
7502                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7503                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7504                                 if (orig != data)
7505                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7506
7507                                 orig = data = RREG32(THM_CLK_CNTL);
7508                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7509                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7510                                 if (orig != data)
7511                                         WREG32(THM_CLK_CNTL, data);
7512
7513                                 orig = data = RREG32(MISC_CLK_CNTL);
7514                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7515                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7516                                 if (orig != data)
7517                                         WREG32(MISC_CLK_CNTL, data);
7518
7519                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7520                                 data &= ~BCLK_AS_XCLK;
7521                                 if (orig != data)
7522                                         WREG32(CG_CLKPIN_CNTL, data);
7523
7524                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7525                                 data &= ~FORCE_BIF_REFCLK_EN;
7526                                 if (orig != data)
7527                                         WREG32(CG_CLKPIN_CNTL_2, data);
7528
7529                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7530                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7531                                 data |= MPLL_CLKOUT_SEL(4);
7532                                 if (orig != data)
7533                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7534
7535                                 orig = data = RREG32(SPLL_CNTL_MODE);
7536                                 data &= ~SPLL_REFCLK_SEL_MASK;
7537                                 if (orig != data)
7538                                         WREG32(SPLL_CNTL_MODE, data);
7539                         }
7540                 }
7541         } else {
7542                 if (orig != data)
7543                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7544         }
7545
7546         orig = data = RREG32_PCIE(PCIE_CNTL2);
7547         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7548         if (orig != data)
7549                 WREG32_PCIE(PCIE_CNTL2, data);
7550
7551         if (!disable_l0s) {
7552                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7553                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7554                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7555                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7556                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7557                                 data &= ~LC_L0S_INACTIVITY_MASK;
7558                                 if (orig != data)
7559                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7560                         }
7561                 }
7562         }
7563 }