Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland...
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37
38
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68
69 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
70 MODULE_FIRMWARE("radeon/VERDE_me.bin");
71 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
74 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
76
77 MODULE_FIRMWARE("radeon/verde_pfp.bin");
78 MODULE_FIRMWARE("radeon/verde_me.bin");
79 MODULE_FIRMWARE("radeon/verde_ce.bin");
80 MODULE_FIRMWARE("radeon/verde_mc.bin");
81 MODULE_FIRMWARE("radeon/verde_rlc.bin");
82 MODULE_FIRMWARE("radeon/verde_smc.bin");
83
84 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
85 MODULE_FIRMWARE("radeon/OLAND_me.bin");
86 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
88 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
89 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
91
92 MODULE_FIRMWARE("radeon/oland_pfp.bin");
93 MODULE_FIRMWARE("radeon/oland_me.bin");
94 MODULE_FIRMWARE("radeon/oland_ce.bin");
95 MODULE_FIRMWARE("radeon/oland_mc.bin");
96 MODULE_FIRMWARE("radeon/oland_rlc.bin");
97 MODULE_FIRMWARE("radeon/oland_smc.bin");
98
99 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
106
107 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
108 MODULE_FIRMWARE("radeon/hainan_me.bin");
109 MODULE_FIRMWARE("radeon/hainan_ce.bin");
110 MODULE_FIRMWARE("radeon/hainan_mc.bin");
111 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
112 MODULE_FIRMWARE("radeon/hainan_smc.bin");
113
114 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
115 static void si_pcie_gen3_enable(struct radeon_device *rdev);
116 static void si_program_aspm(struct radeon_device *rdev);
117 extern void sumo_rlc_fini(struct radeon_device *rdev);
118 extern int sumo_rlc_init(struct radeon_device *rdev);
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
122 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
125 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
126 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
127 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
128                                          bool enable);
129 static void si_init_pg(struct radeon_device *rdev);
130 static void si_init_cg(struct radeon_device *rdev);
131 static void si_fini_pg(struct radeon_device *rdev);
132 static void si_fini_cg(struct radeon_device *rdev);
133 static void si_rlc_stop(struct radeon_device *rdev);
134
135 static const u32 verde_rlc_save_restore_register_list[] =
136 {
137         (0x8000 << 16) | (0x98f4 >> 2),
138         0x00000000,
139         (0x8040 << 16) | (0x98f4 >> 2),
140         0x00000000,
141         (0x8000 << 16) | (0xe80 >> 2),
142         0x00000000,
143         (0x8040 << 16) | (0xe80 >> 2),
144         0x00000000,
145         (0x8000 << 16) | (0x89bc >> 2),
146         0x00000000,
147         (0x8040 << 16) | (0x89bc >> 2),
148         0x00000000,
149         (0x8000 << 16) | (0x8c1c >> 2),
150         0x00000000,
151         (0x8040 << 16) | (0x8c1c >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x98f0 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0xe7c >> 2),
156         0x00000000,
157         (0x8000 << 16) | (0x9148 >> 2),
158         0x00000000,
159         (0x8040 << 16) | (0x9148 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9150 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x897c >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x8d8c >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0xac54 >> 2),
168         0X00000000,
169         0x3,
170         (0x9c00 << 16) | (0x98f8 >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x9910 >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9914 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9918 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x991c >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x9920 >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x9924 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x9928 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x992c >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x9930 >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x9934 >> 2),
191         0x00000000,
192         (0x9c00 << 16) | (0x9938 >> 2),
193         0x00000000,
194         (0x9c00 << 16) | (0x993c >> 2),
195         0x00000000,
196         (0x9c00 << 16) | (0x9940 >> 2),
197         0x00000000,
198         (0x9c00 << 16) | (0x9944 >> 2),
199         0x00000000,
200         (0x9c00 << 16) | (0x9948 >> 2),
201         0x00000000,
202         (0x9c00 << 16) | (0x994c >> 2),
203         0x00000000,
204         (0x9c00 << 16) | (0x9950 >> 2),
205         0x00000000,
206         (0x9c00 << 16) | (0x9954 >> 2),
207         0x00000000,
208         (0x9c00 << 16) | (0x9958 >> 2),
209         0x00000000,
210         (0x9c00 << 16) | (0x995c >> 2),
211         0x00000000,
212         (0x9c00 << 16) | (0x9960 >> 2),
213         0x00000000,
214         (0x9c00 << 16) | (0x9964 >> 2),
215         0x00000000,
216         (0x9c00 << 16) | (0x9968 >> 2),
217         0x00000000,
218         (0x9c00 << 16) | (0x996c >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x9970 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9974 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x9978 >> 2),
225         0x00000000,
226         (0x9c00 << 16) | (0x997c >> 2),
227         0x00000000,
228         (0x9c00 << 16) | (0x9980 >> 2),
229         0x00000000,
230         (0x9c00 << 16) | (0x9984 >> 2),
231         0x00000000,
232         (0x9c00 << 16) | (0x9988 >> 2),
233         0x00000000,
234         (0x9c00 << 16) | (0x998c >> 2),
235         0x00000000,
236         (0x9c00 << 16) | (0x8c00 >> 2),
237         0x00000000,
238         (0x9c00 << 16) | (0x8c14 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x8c04 >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8c08 >> 2),
243         0x00000000,
244         (0x8000 << 16) | (0x9b7c >> 2),
245         0x00000000,
246         (0x8040 << 16) | (0x9b7c >> 2),
247         0x00000000,
248         (0x8000 << 16) | (0xe84 >> 2),
249         0x00000000,
250         (0x8040 << 16) | (0xe84 >> 2),
251         0x00000000,
252         (0x8000 << 16) | (0x89c0 >> 2),
253         0x00000000,
254         (0x8040 << 16) | (0x89c0 >> 2),
255         0x00000000,
256         (0x8000 << 16) | (0x914c >> 2),
257         0x00000000,
258         (0x8040 << 16) | (0x914c >> 2),
259         0x00000000,
260         (0x8000 << 16) | (0x8c20 >> 2),
261         0x00000000,
262         (0x8040 << 16) | (0x8c20 >> 2),
263         0x00000000,
264         (0x8000 << 16) | (0x9354 >> 2),
265         0x00000000,
266         (0x8040 << 16) | (0x9354 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0x9060 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x9364 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x9100 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x913c >> 2),
275         0x00000000,
276         (0x8000 << 16) | (0x90e0 >> 2),
277         0x00000000,
278         (0x8000 << 16) | (0x90e4 >> 2),
279         0x00000000,
280         (0x8000 << 16) | (0x90e8 >> 2),
281         0x00000000,
282         (0x8040 << 16) | (0x90e0 >> 2),
283         0x00000000,
284         (0x8040 << 16) | (0x90e4 >> 2),
285         0x00000000,
286         (0x8040 << 16) | (0x90e8 >> 2),
287         0x00000000,
288         (0x9c00 << 16) | (0x8bcc >> 2),
289         0x00000000,
290         (0x9c00 << 16) | (0x8b24 >> 2),
291         0x00000000,
292         (0x9c00 << 16) | (0x88c4 >> 2),
293         0x00000000,
294         (0x9c00 << 16) | (0x8e50 >> 2),
295         0x00000000,
296         (0x9c00 << 16) | (0x8c0c >> 2),
297         0x00000000,
298         (0x9c00 << 16) | (0x8e58 >> 2),
299         0x00000000,
300         (0x9c00 << 16) | (0x8e5c >> 2),
301         0x00000000,
302         (0x9c00 << 16) | (0x9508 >> 2),
303         0x00000000,
304         (0x9c00 << 16) | (0x950c >> 2),
305         0x00000000,
306         (0x9c00 << 16) | (0x9494 >> 2),
307         0x00000000,
308         (0x9c00 << 16) | (0xac0c >> 2),
309         0x00000000,
310         (0x9c00 << 16) | (0xac10 >> 2),
311         0x00000000,
312         (0x9c00 << 16) | (0xac14 >> 2),
313         0x00000000,
314         (0x9c00 << 16) | (0xae00 >> 2),
315         0x00000000,
316         (0x9c00 << 16) | (0xac08 >> 2),
317         0x00000000,
318         (0x9c00 << 16) | (0x88d4 >> 2),
319         0x00000000,
320         (0x9c00 << 16) | (0x88c8 >> 2),
321         0x00000000,
322         (0x9c00 << 16) | (0x88cc >> 2),
323         0x00000000,
324         (0x9c00 << 16) | (0x89b0 >> 2),
325         0x00000000,
326         (0x9c00 << 16) | (0x8b10 >> 2),
327         0x00000000,
328         (0x9c00 << 16) | (0x8a14 >> 2),
329         0x00000000,
330         (0x9c00 << 16) | (0x9830 >> 2),
331         0x00000000,
332         (0x9c00 << 16) | (0x9834 >> 2),
333         0x00000000,
334         (0x9c00 << 16) | (0x9838 >> 2),
335         0x00000000,
336         (0x9c00 << 16) | (0x9a10 >> 2),
337         0x00000000,
338         (0x8000 << 16) | (0x9870 >> 2),
339         0x00000000,
340         (0x8000 << 16) | (0x9874 >> 2),
341         0x00000000,
342         (0x8001 << 16) | (0x9870 >> 2),
343         0x00000000,
344         (0x8001 << 16) | (0x9874 >> 2),
345         0x00000000,
346         (0x8040 << 16) | (0x9870 >> 2),
347         0x00000000,
348         (0x8040 << 16) | (0x9874 >> 2),
349         0x00000000,
350         (0x8041 << 16) | (0x9870 >> 2),
351         0x00000000,
352         (0x8041 << 16) | (0x9874 >> 2),
353         0x00000000,
354         0x00000000
355 };
356
357 static const u32 tahiti_golden_rlc_registers[] =
358 {
359         0xc424, 0xffffffff, 0x00601005,
360         0xc47c, 0xffffffff, 0x10104040,
361         0xc488, 0xffffffff, 0x0100000a,
362         0xc314, 0xffffffff, 0x00000800,
363         0xc30c, 0xffffffff, 0x800000f4,
364         0xf4a8, 0xffffffff, 0x00000000
365 };
366
367 static const u32 tahiti_golden_registers[] =
368 {
369         0x9a10, 0x00010000, 0x00018208,
370         0x9830, 0xffffffff, 0x00000000,
371         0x9834, 0xf00fffff, 0x00000400,
372         0x9838, 0x0002021c, 0x00020200,
373         0xc78, 0x00000080, 0x00000000,
374         0xd030, 0x000300c0, 0x00800040,
375         0xd830, 0x000300c0, 0x00800040,
376         0x5bb0, 0x000000f0, 0x00000070,
377         0x5bc0, 0x00200000, 0x50100000,
378         0x7030, 0x31000311, 0x00000011,
379         0x277c, 0x00000003, 0x000007ff,
380         0x240c, 0x000007ff, 0x00000000,
381         0x8a14, 0xf000001f, 0x00000007,
382         0x8b24, 0xffffffff, 0x00ffffff,
383         0x8b10, 0x0000ff0f, 0x00000000,
384         0x28a4c, 0x07ffffff, 0x4e000000,
385         0x28350, 0x3f3f3fff, 0x2a00126a,
386         0x30, 0x000000ff, 0x0040,
387         0x34, 0x00000040, 0x00004040,
388         0x9100, 0x07ffffff, 0x03000000,
389         0x8e88, 0x01ff1f3f, 0x00000000,
390         0x8e84, 0x01ff1f3f, 0x00000000,
391         0x9060, 0x0000007f, 0x00000020,
392         0x9508, 0x00010000, 0x00010000,
393         0xac14, 0x00000200, 0x000002fb,
394         0xac10, 0xffffffff, 0x0000543b,
395         0xac0c, 0xffffffff, 0xa9210876,
396         0x88d0, 0xffffffff, 0x000fff40,
397         0x88d4, 0x0000001f, 0x00000010,
398         0x1410, 0x20000000, 0x20fffed8,
399         0x15c0, 0x000c0fc0, 0x000c0400
400 };
401
402 static const u32 tahiti_golden_registers2[] =
403 {
404         0xc64, 0x00000001, 0x00000001
405 };
406
407 static const u32 pitcairn_golden_rlc_registers[] =
408 {
409         0xc424, 0xffffffff, 0x00601004,
410         0xc47c, 0xffffffff, 0x10102020,
411         0xc488, 0xffffffff, 0x01000020,
412         0xc314, 0xffffffff, 0x00000800,
413         0xc30c, 0xffffffff, 0x800000a4
414 };
415
416 static const u32 pitcairn_golden_registers[] =
417 {
418         0x9a10, 0x00010000, 0x00018208,
419         0x9830, 0xffffffff, 0x00000000,
420         0x9834, 0xf00fffff, 0x00000400,
421         0x9838, 0x0002021c, 0x00020200,
422         0xc78, 0x00000080, 0x00000000,
423         0xd030, 0x000300c0, 0x00800040,
424         0xd830, 0x000300c0, 0x00800040,
425         0x5bb0, 0x000000f0, 0x00000070,
426         0x5bc0, 0x00200000, 0x50100000,
427         0x7030, 0x31000311, 0x00000011,
428         0x2ae4, 0x00073ffe, 0x000022a2,
429         0x240c, 0x000007ff, 0x00000000,
430         0x8a14, 0xf000001f, 0x00000007,
431         0x8b24, 0xffffffff, 0x00ffffff,
432         0x8b10, 0x0000ff0f, 0x00000000,
433         0x28a4c, 0x07ffffff, 0x4e000000,
434         0x28350, 0x3f3f3fff, 0x2a00126a,
435         0x30, 0x000000ff, 0x0040,
436         0x34, 0x00000040, 0x00004040,
437         0x9100, 0x07ffffff, 0x03000000,
438         0x9060, 0x0000007f, 0x00000020,
439         0x9508, 0x00010000, 0x00010000,
440         0xac14, 0x000003ff, 0x000000f7,
441         0xac10, 0xffffffff, 0x00000000,
442         0xac0c, 0xffffffff, 0x32761054,
443         0x88d4, 0x0000001f, 0x00000010,
444         0x15c0, 0x000c0fc0, 0x000c0400
445 };
446
447 static const u32 verde_golden_rlc_registers[] =
448 {
449         0xc424, 0xffffffff, 0x033f1005,
450         0xc47c, 0xffffffff, 0x10808020,
451         0xc488, 0xffffffff, 0x00800008,
452         0xc314, 0xffffffff, 0x00001000,
453         0xc30c, 0xffffffff, 0x80010014
454 };
455
456 static const u32 verde_golden_registers[] =
457 {
458         0x9a10, 0x00010000, 0x00018208,
459         0x9830, 0xffffffff, 0x00000000,
460         0x9834, 0xf00fffff, 0x00000400,
461         0x9838, 0x0002021c, 0x00020200,
462         0xc78, 0x00000080, 0x00000000,
463         0xd030, 0x000300c0, 0x00800040,
464         0xd030, 0x000300c0, 0x00800040,
465         0xd830, 0x000300c0, 0x00800040,
466         0xd830, 0x000300c0, 0x00800040,
467         0x5bb0, 0x000000f0, 0x00000070,
468         0x5bc0, 0x00200000, 0x50100000,
469         0x7030, 0x31000311, 0x00000011,
470         0x2ae4, 0x00073ffe, 0x000022a2,
471         0x2ae4, 0x00073ffe, 0x000022a2,
472         0x2ae4, 0x00073ffe, 0x000022a2,
473         0x240c, 0x000007ff, 0x00000000,
474         0x240c, 0x000007ff, 0x00000000,
475         0x240c, 0x000007ff, 0x00000000,
476         0x8a14, 0xf000001f, 0x00000007,
477         0x8a14, 0xf000001f, 0x00000007,
478         0x8a14, 0xf000001f, 0x00000007,
479         0x8b24, 0xffffffff, 0x00ffffff,
480         0x8b10, 0x0000ff0f, 0x00000000,
481         0x28a4c, 0x07ffffff, 0x4e000000,
482         0x28350, 0x3f3f3fff, 0x0000124a,
483         0x28350, 0x3f3f3fff, 0x0000124a,
484         0x28350, 0x3f3f3fff, 0x0000124a,
485         0x30, 0x000000ff, 0x0040,
486         0x34, 0x00000040, 0x00004040,
487         0x9100, 0x07ffffff, 0x03000000,
488         0x9100, 0x07ffffff, 0x03000000,
489         0x8e88, 0x01ff1f3f, 0x00000000,
490         0x8e88, 0x01ff1f3f, 0x00000000,
491         0x8e88, 0x01ff1f3f, 0x00000000,
492         0x8e84, 0x01ff1f3f, 0x00000000,
493         0x8e84, 0x01ff1f3f, 0x00000000,
494         0x8e84, 0x01ff1f3f, 0x00000000,
495         0x9060, 0x0000007f, 0x00000020,
496         0x9508, 0x00010000, 0x00010000,
497         0xac14, 0x000003ff, 0x00000003,
498         0xac14, 0x000003ff, 0x00000003,
499         0xac14, 0x000003ff, 0x00000003,
500         0xac10, 0xffffffff, 0x00000000,
501         0xac10, 0xffffffff, 0x00000000,
502         0xac10, 0xffffffff, 0x00000000,
503         0xac0c, 0xffffffff, 0x00001032,
504         0xac0c, 0xffffffff, 0x00001032,
505         0xac0c, 0xffffffff, 0x00001032,
506         0x88d4, 0x0000001f, 0x00000010,
507         0x88d4, 0x0000001f, 0x00000010,
508         0x88d4, 0x0000001f, 0x00000010,
509         0x15c0, 0x000c0fc0, 0x000c0400
510 };
511
512 static const u32 oland_golden_rlc_registers[] =
513 {
514         0xc424, 0xffffffff, 0x00601005,
515         0xc47c, 0xffffffff, 0x10104040,
516         0xc488, 0xffffffff, 0x0100000a,
517         0xc314, 0xffffffff, 0x00000800,
518         0xc30c, 0xffffffff, 0x800000f4
519 };
520
521 static const u32 oland_golden_registers[] =
522 {
523         0x9a10, 0x00010000, 0x00018208,
524         0x9830, 0xffffffff, 0x00000000,
525         0x9834, 0xf00fffff, 0x00000400,
526         0x9838, 0x0002021c, 0x00020200,
527         0xc78, 0x00000080, 0x00000000,
528         0xd030, 0x000300c0, 0x00800040,
529         0xd830, 0x000300c0, 0x00800040,
530         0x5bb0, 0x000000f0, 0x00000070,
531         0x5bc0, 0x00200000, 0x50100000,
532         0x7030, 0x31000311, 0x00000011,
533         0x2ae4, 0x00073ffe, 0x000022a2,
534         0x240c, 0x000007ff, 0x00000000,
535         0x8a14, 0xf000001f, 0x00000007,
536         0x8b24, 0xffffffff, 0x00ffffff,
537         0x8b10, 0x0000ff0f, 0x00000000,
538         0x28a4c, 0x07ffffff, 0x4e000000,
539         0x28350, 0x3f3f3fff, 0x00000082,
540         0x30, 0x000000ff, 0x0040,
541         0x34, 0x00000040, 0x00004040,
542         0x9100, 0x07ffffff, 0x03000000,
543         0x9060, 0x0000007f, 0x00000020,
544         0x9508, 0x00010000, 0x00010000,
545         0xac14, 0x000003ff, 0x000000f3,
546         0xac10, 0xffffffff, 0x00000000,
547         0xac0c, 0xffffffff, 0x00003210,
548         0x88d4, 0x0000001f, 0x00000010,
549         0x15c0, 0x000c0fc0, 0x000c0400
550 };
551
552 static const u32 hainan_golden_registers[] =
553 {
554         0x9a10, 0x00010000, 0x00018208,
555         0x9830, 0xffffffff, 0x00000000,
556         0x9834, 0xf00fffff, 0x00000400,
557         0x9838, 0x0002021c, 0x00020200,
558         0xd0c0, 0xff000fff, 0x00000100,
559         0xd030, 0x000300c0, 0x00800040,
560         0xd8c0, 0xff000fff, 0x00000100,
561         0xd830, 0x000300c0, 0x00800040,
562         0x2ae4, 0x00073ffe, 0x000022a2,
563         0x240c, 0x000007ff, 0x00000000,
564         0x8a14, 0xf000001f, 0x00000007,
565         0x8b24, 0xffffffff, 0x00ffffff,
566         0x8b10, 0x0000ff0f, 0x00000000,
567         0x28a4c, 0x07ffffff, 0x4e000000,
568         0x28350, 0x3f3f3fff, 0x00000000,
569         0x30, 0x000000ff, 0x0040,
570         0x34, 0x00000040, 0x00004040,
571         0x9100, 0x03e00000, 0x03600000,
572         0x9060, 0x0000007f, 0x00000020,
573         0x9508, 0x00010000, 0x00010000,
574         0xac14, 0x000003ff, 0x000000f1,
575         0xac10, 0xffffffff, 0x00000000,
576         0xac0c, 0xffffffff, 0x00003210,
577         0x88d4, 0x0000001f, 0x00000010,
578         0x15c0, 0x000c0fc0, 0x000c0400
579 };
580
581 static const u32 hainan_golden_registers2[] =
582 {
583         0x98f8, 0xffffffff, 0x02010001
584 };
585
586 static const u32 tahiti_mgcg_cgcg_init[] =
587 {
588         0xc400, 0xffffffff, 0xfffffffc,
589         0x802c, 0xffffffff, 0xe0000000,
590         0x9a60, 0xffffffff, 0x00000100,
591         0x92a4, 0xffffffff, 0x00000100,
592         0xc164, 0xffffffff, 0x00000100,
593         0x9774, 0xffffffff, 0x00000100,
594         0x8984, 0xffffffff, 0x06000100,
595         0x8a18, 0xffffffff, 0x00000100,
596         0x92a0, 0xffffffff, 0x00000100,
597         0xc380, 0xffffffff, 0x00000100,
598         0x8b28, 0xffffffff, 0x00000100,
599         0x9144, 0xffffffff, 0x00000100,
600         0x8d88, 0xffffffff, 0x00000100,
601         0x8d8c, 0xffffffff, 0x00000100,
602         0x9030, 0xffffffff, 0x00000100,
603         0x9034, 0xffffffff, 0x00000100,
604         0x9038, 0xffffffff, 0x00000100,
605         0x903c, 0xffffffff, 0x00000100,
606         0xad80, 0xffffffff, 0x00000100,
607         0xac54, 0xffffffff, 0x00000100,
608         0x897c, 0xffffffff, 0x06000100,
609         0x9868, 0xffffffff, 0x00000100,
610         0x9510, 0xffffffff, 0x00000100,
611         0xaf04, 0xffffffff, 0x00000100,
612         0xae04, 0xffffffff, 0x00000100,
613         0x949c, 0xffffffff, 0x00000100,
614         0x802c, 0xffffffff, 0xe0000000,
615         0x9160, 0xffffffff, 0x00010000,
616         0x9164, 0xffffffff, 0x00030002,
617         0x9168, 0xffffffff, 0x00040007,
618         0x916c, 0xffffffff, 0x00060005,
619         0x9170, 0xffffffff, 0x00090008,
620         0x9174, 0xffffffff, 0x00020001,
621         0x9178, 0xffffffff, 0x00040003,
622         0x917c, 0xffffffff, 0x00000007,
623         0x9180, 0xffffffff, 0x00060005,
624         0x9184, 0xffffffff, 0x00090008,
625         0x9188, 0xffffffff, 0x00030002,
626         0x918c, 0xffffffff, 0x00050004,
627         0x9190, 0xffffffff, 0x00000008,
628         0x9194, 0xffffffff, 0x00070006,
629         0x9198, 0xffffffff, 0x000a0009,
630         0x919c, 0xffffffff, 0x00040003,
631         0x91a0, 0xffffffff, 0x00060005,
632         0x91a4, 0xffffffff, 0x00000009,
633         0x91a8, 0xffffffff, 0x00080007,
634         0x91ac, 0xffffffff, 0x000b000a,
635         0x91b0, 0xffffffff, 0x00050004,
636         0x91b4, 0xffffffff, 0x00070006,
637         0x91b8, 0xffffffff, 0x0008000b,
638         0x91bc, 0xffffffff, 0x000a0009,
639         0x91c0, 0xffffffff, 0x000d000c,
640         0x91c4, 0xffffffff, 0x00060005,
641         0x91c8, 0xffffffff, 0x00080007,
642         0x91cc, 0xffffffff, 0x0000000b,
643         0x91d0, 0xffffffff, 0x000a0009,
644         0x91d4, 0xffffffff, 0x000d000c,
645         0x91d8, 0xffffffff, 0x00070006,
646         0x91dc, 0xffffffff, 0x00090008,
647         0x91e0, 0xffffffff, 0x0000000c,
648         0x91e4, 0xffffffff, 0x000b000a,
649         0x91e8, 0xffffffff, 0x000e000d,
650         0x91ec, 0xffffffff, 0x00080007,
651         0x91f0, 0xffffffff, 0x000a0009,
652         0x91f4, 0xffffffff, 0x0000000d,
653         0x91f8, 0xffffffff, 0x000c000b,
654         0x91fc, 0xffffffff, 0x000f000e,
655         0x9200, 0xffffffff, 0x00090008,
656         0x9204, 0xffffffff, 0x000b000a,
657         0x9208, 0xffffffff, 0x000c000f,
658         0x920c, 0xffffffff, 0x000e000d,
659         0x9210, 0xffffffff, 0x00110010,
660         0x9214, 0xffffffff, 0x000a0009,
661         0x9218, 0xffffffff, 0x000c000b,
662         0x921c, 0xffffffff, 0x0000000f,
663         0x9220, 0xffffffff, 0x000e000d,
664         0x9224, 0xffffffff, 0x00110010,
665         0x9228, 0xffffffff, 0x000b000a,
666         0x922c, 0xffffffff, 0x000d000c,
667         0x9230, 0xffffffff, 0x00000010,
668         0x9234, 0xffffffff, 0x000f000e,
669         0x9238, 0xffffffff, 0x00120011,
670         0x923c, 0xffffffff, 0x000c000b,
671         0x9240, 0xffffffff, 0x000e000d,
672         0x9244, 0xffffffff, 0x00000011,
673         0x9248, 0xffffffff, 0x0010000f,
674         0x924c, 0xffffffff, 0x00130012,
675         0x9250, 0xffffffff, 0x000d000c,
676         0x9254, 0xffffffff, 0x000f000e,
677         0x9258, 0xffffffff, 0x00100013,
678         0x925c, 0xffffffff, 0x00120011,
679         0x9260, 0xffffffff, 0x00150014,
680         0x9264, 0xffffffff, 0x000e000d,
681         0x9268, 0xffffffff, 0x0010000f,
682         0x926c, 0xffffffff, 0x00000013,
683         0x9270, 0xffffffff, 0x00120011,
684         0x9274, 0xffffffff, 0x00150014,
685         0x9278, 0xffffffff, 0x000f000e,
686         0x927c, 0xffffffff, 0x00110010,
687         0x9280, 0xffffffff, 0x00000014,
688         0x9284, 0xffffffff, 0x00130012,
689         0x9288, 0xffffffff, 0x00160015,
690         0x928c, 0xffffffff, 0x0010000f,
691         0x9290, 0xffffffff, 0x00120011,
692         0x9294, 0xffffffff, 0x00000015,
693         0x9298, 0xffffffff, 0x00140013,
694         0x929c, 0xffffffff, 0x00170016,
695         0x9150, 0xffffffff, 0x96940200,
696         0x8708, 0xffffffff, 0x00900100,
697         0xc478, 0xffffffff, 0x00000080,
698         0xc404, 0xffffffff, 0x0020003f,
699         0x30, 0xffffffff, 0x0000001c,
700         0x34, 0x000f0000, 0x000f0000,
701         0x160c, 0xffffffff, 0x00000100,
702         0x1024, 0xffffffff, 0x00000100,
703         0x102c, 0x00000101, 0x00000000,
704         0x20a8, 0xffffffff, 0x00000104,
705         0x264c, 0x000c0000, 0x000c0000,
706         0x2648, 0x000c0000, 0x000c0000,
707         0x55e4, 0xff000fff, 0x00000100,
708         0x55e8, 0x00000001, 0x00000001,
709         0x2f50, 0x00000001, 0x00000001,
710         0x30cc, 0xc0000fff, 0x00000104,
711         0xc1e4, 0x00000001, 0x00000001,
712         0xd0c0, 0xfffffff0, 0x00000100,
713         0xd8c0, 0xfffffff0, 0x00000100
714 };
715
716 static const u32 pitcairn_mgcg_cgcg_init[] =
717 {
718         0xc400, 0xffffffff, 0xfffffffc,
719         0x802c, 0xffffffff, 0xe0000000,
720         0x9a60, 0xffffffff, 0x00000100,
721         0x92a4, 0xffffffff, 0x00000100,
722         0xc164, 0xffffffff, 0x00000100,
723         0x9774, 0xffffffff, 0x00000100,
724         0x8984, 0xffffffff, 0x06000100,
725         0x8a18, 0xffffffff, 0x00000100,
726         0x92a0, 0xffffffff, 0x00000100,
727         0xc380, 0xffffffff, 0x00000100,
728         0x8b28, 0xffffffff, 0x00000100,
729         0x9144, 0xffffffff, 0x00000100,
730         0x8d88, 0xffffffff, 0x00000100,
731         0x8d8c, 0xffffffff, 0x00000100,
732         0x9030, 0xffffffff, 0x00000100,
733         0x9034, 0xffffffff, 0x00000100,
734         0x9038, 0xffffffff, 0x00000100,
735         0x903c, 0xffffffff, 0x00000100,
736         0xad80, 0xffffffff, 0x00000100,
737         0xac54, 0xffffffff, 0x00000100,
738         0x897c, 0xffffffff, 0x06000100,
739         0x9868, 0xffffffff, 0x00000100,
740         0x9510, 0xffffffff, 0x00000100,
741         0xaf04, 0xffffffff, 0x00000100,
742         0xae04, 0xffffffff, 0x00000100,
743         0x949c, 0xffffffff, 0x00000100,
744         0x802c, 0xffffffff, 0xe0000000,
745         0x9160, 0xffffffff, 0x00010000,
746         0x9164, 0xffffffff, 0x00030002,
747         0x9168, 0xffffffff, 0x00040007,
748         0x916c, 0xffffffff, 0x00060005,
749         0x9170, 0xffffffff, 0x00090008,
750         0x9174, 0xffffffff, 0x00020001,
751         0x9178, 0xffffffff, 0x00040003,
752         0x917c, 0xffffffff, 0x00000007,
753         0x9180, 0xffffffff, 0x00060005,
754         0x9184, 0xffffffff, 0x00090008,
755         0x9188, 0xffffffff, 0x00030002,
756         0x918c, 0xffffffff, 0x00050004,
757         0x9190, 0xffffffff, 0x00000008,
758         0x9194, 0xffffffff, 0x00070006,
759         0x9198, 0xffffffff, 0x000a0009,
760         0x919c, 0xffffffff, 0x00040003,
761         0x91a0, 0xffffffff, 0x00060005,
762         0x91a4, 0xffffffff, 0x00000009,
763         0x91a8, 0xffffffff, 0x00080007,
764         0x91ac, 0xffffffff, 0x000b000a,
765         0x91b0, 0xffffffff, 0x00050004,
766         0x91b4, 0xffffffff, 0x00070006,
767         0x91b8, 0xffffffff, 0x0008000b,
768         0x91bc, 0xffffffff, 0x000a0009,
769         0x91c0, 0xffffffff, 0x000d000c,
770         0x9200, 0xffffffff, 0x00090008,
771         0x9204, 0xffffffff, 0x000b000a,
772         0x9208, 0xffffffff, 0x000c000f,
773         0x920c, 0xffffffff, 0x000e000d,
774         0x9210, 0xffffffff, 0x00110010,
775         0x9214, 0xffffffff, 0x000a0009,
776         0x9218, 0xffffffff, 0x000c000b,
777         0x921c, 0xffffffff, 0x0000000f,
778         0x9220, 0xffffffff, 0x000e000d,
779         0x9224, 0xffffffff, 0x00110010,
780         0x9228, 0xffffffff, 0x000b000a,
781         0x922c, 0xffffffff, 0x000d000c,
782         0x9230, 0xffffffff, 0x00000010,
783         0x9234, 0xffffffff, 0x000f000e,
784         0x9238, 0xffffffff, 0x00120011,
785         0x923c, 0xffffffff, 0x000c000b,
786         0x9240, 0xffffffff, 0x000e000d,
787         0x9244, 0xffffffff, 0x00000011,
788         0x9248, 0xffffffff, 0x0010000f,
789         0x924c, 0xffffffff, 0x00130012,
790         0x9250, 0xffffffff, 0x000d000c,
791         0x9254, 0xffffffff, 0x000f000e,
792         0x9258, 0xffffffff, 0x00100013,
793         0x925c, 0xffffffff, 0x00120011,
794         0x9260, 0xffffffff, 0x00150014,
795         0x9150, 0xffffffff, 0x96940200,
796         0x8708, 0xffffffff, 0x00900100,
797         0xc478, 0xffffffff, 0x00000080,
798         0xc404, 0xffffffff, 0x0020003f,
799         0x30, 0xffffffff, 0x0000001c,
800         0x34, 0x000f0000, 0x000f0000,
801         0x160c, 0xffffffff, 0x00000100,
802         0x1024, 0xffffffff, 0x00000100,
803         0x102c, 0x00000101, 0x00000000,
804         0x20a8, 0xffffffff, 0x00000104,
805         0x55e4, 0xff000fff, 0x00000100,
806         0x55e8, 0x00000001, 0x00000001,
807         0x2f50, 0x00000001, 0x00000001,
808         0x30cc, 0xc0000fff, 0x00000104,
809         0xc1e4, 0x00000001, 0x00000001,
810         0xd0c0, 0xfffffff0, 0x00000100,
811         0xd8c0, 0xfffffff0, 0x00000100
812 };
813
814 static const u32 verde_mgcg_cgcg_init[] =
815 {
816         0xc400, 0xffffffff, 0xfffffffc,
817         0x802c, 0xffffffff, 0xe0000000,
818         0x9a60, 0xffffffff, 0x00000100,
819         0x92a4, 0xffffffff, 0x00000100,
820         0xc164, 0xffffffff, 0x00000100,
821         0x9774, 0xffffffff, 0x00000100,
822         0x8984, 0xffffffff, 0x06000100,
823         0x8a18, 0xffffffff, 0x00000100,
824         0x92a0, 0xffffffff, 0x00000100,
825         0xc380, 0xffffffff, 0x00000100,
826         0x8b28, 0xffffffff, 0x00000100,
827         0x9144, 0xffffffff, 0x00000100,
828         0x8d88, 0xffffffff, 0x00000100,
829         0x8d8c, 0xffffffff, 0x00000100,
830         0x9030, 0xffffffff, 0x00000100,
831         0x9034, 0xffffffff, 0x00000100,
832         0x9038, 0xffffffff, 0x00000100,
833         0x903c, 0xffffffff, 0x00000100,
834         0xad80, 0xffffffff, 0x00000100,
835         0xac54, 0xffffffff, 0x00000100,
836         0x897c, 0xffffffff, 0x06000100,
837         0x9868, 0xffffffff, 0x00000100,
838         0x9510, 0xffffffff, 0x00000100,
839         0xaf04, 0xffffffff, 0x00000100,
840         0xae04, 0xffffffff, 0x00000100,
841         0x949c, 0xffffffff, 0x00000100,
842         0x802c, 0xffffffff, 0xe0000000,
843         0x9160, 0xffffffff, 0x00010000,
844         0x9164, 0xffffffff, 0x00030002,
845         0x9168, 0xffffffff, 0x00040007,
846         0x916c, 0xffffffff, 0x00060005,
847         0x9170, 0xffffffff, 0x00090008,
848         0x9174, 0xffffffff, 0x00020001,
849         0x9178, 0xffffffff, 0x00040003,
850         0x917c, 0xffffffff, 0x00000007,
851         0x9180, 0xffffffff, 0x00060005,
852         0x9184, 0xffffffff, 0x00090008,
853         0x9188, 0xffffffff, 0x00030002,
854         0x918c, 0xffffffff, 0x00050004,
855         0x9190, 0xffffffff, 0x00000008,
856         0x9194, 0xffffffff, 0x00070006,
857         0x9198, 0xffffffff, 0x000a0009,
858         0x919c, 0xffffffff, 0x00040003,
859         0x91a0, 0xffffffff, 0x00060005,
860         0x91a4, 0xffffffff, 0x00000009,
861         0x91a8, 0xffffffff, 0x00080007,
862         0x91ac, 0xffffffff, 0x000b000a,
863         0x91b0, 0xffffffff, 0x00050004,
864         0x91b4, 0xffffffff, 0x00070006,
865         0x91b8, 0xffffffff, 0x0008000b,
866         0x91bc, 0xffffffff, 0x000a0009,
867         0x91c0, 0xffffffff, 0x000d000c,
868         0x9200, 0xffffffff, 0x00090008,
869         0x9204, 0xffffffff, 0x000b000a,
870         0x9208, 0xffffffff, 0x000c000f,
871         0x920c, 0xffffffff, 0x000e000d,
872         0x9210, 0xffffffff, 0x00110010,
873         0x9214, 0xffffffff, 0x000a0009,
874         0x9218, 0xffffffff, 0x000c000b,
875         0x921c, 0xffffffff, 0x0000000f,
876         0x9220, 0xffffffff, 0x000e000d,
877         0x9224, 0xffffffff, 0x00110010,
878         0x9228, 0xffffffff, 0x000b000a,
879         0x922c, 0xffffffff, 0x000d000c,
880         0x9230, 0xffffffff, 0x00000010,
881         0x9234, 0xffffffff, 0x000f000e,
882         0x9238, 0xffffffff, 0x00120011,
883         0x923c, 0xffffffff, 0x000c000b,
884         0x9240, 0xffffffff, 0x000e000d,
885         0x9244, 0xffffffff, 0x00000011,
886         0x9248, 0xffffffff, 0x0010000f,
887         0x924c, 0xffffffff, 0x00130012,
888         0x9250, 0xffffffff, 0x000d000c,
889         0x9254, 0xffffffff, 0x000f000e,
890         0x9258, 0xffffffff, 0x00100013,
891         0x925c, 0xffffffff, 0x00120011,
892         0x9260, 0xffffffff, 0x00150014,
893         0x9150, 0xffffffff, 0x96940200,
894         0x8708, 0xffffffff, 0x00900100,
895         0xc478, 0xffffffff, 0x00000080,
896         0xc404, 0xffffffff, 0x0020003f,
897         0x30, 0xffffffff, 0x0000001c,
898         0x34, 0x000f0000, 0x000f0000,
899         0x160c, 0xffffffff, 0x00000100,
900         0x1024, 0xffffffff, 0x00000100,
901         0x102c, 0x00000101, 0x00000000,
902         0x20a8, 0xffffffff, 0x00000104,
903         0x264c, 0x000c0000, 0x000c0000,
904         0x2648, 0x000c0000, 0x000c0000,
905         0x55e4, 0xff000fff, 0x00000100,
906         0x55e8, 0x00000001, 0x00000001,
907         0x2f50, 0x00000001, 0x00000001,
908         0x30cc, 0xc0000fff, 0x00000104,
909         0xc1e4, 0x00000001, 0x00000001,
910         0xd0c0, 0xfffffff0, 0x00000100,
911         0xd8c0, 0xfffffff0, 0x00000100
912 };
913
914 static const u32 oland_mgcg_cgcg_init[] =
915 {
916         0xc400, 0xffffffff, 0xfffffffc,
917         0x802c, 0xffffffff, 0xe0000000,
918         0x9a60, 0xffffffff, 0x00000100,
919         0x92a4, 0xffffffff, 0x00000100,
920         0xc164, 0xffffffff, 0x00000100,
921         0x9774, 0xffffffff, 0x00000100,
922         0x8984, 0xffffffff, 0x06000100,
923         0x8a18, 0xffffffff, 0x00000100,
924         0x92a0, 0xffffffff, 0x00000100,
925         0xc380, 0xffffffff, 0x00000100,
926         0x8b28, 0xffffffff, 0x00000100,
927         0x9144, 0xffffffff, 0x00000100,
928         0x8d88, 0xffffffff, 0x00000100,
929         0x8d8c, 0xffffffff, 0x00000100,
930         0x9030, 0xffffffff, 0x00000100,
931         0x9034, 0xffffffff, 0x00000100,
932         0x9038, 0xffffffff, 0x00000100,
933         0x903c, 0xffffffff, 0x00000100,
934         0xad80, 0xffffffff, 0x00000100,
935         0xac54, 0xffffffff, 0x00000100,
936         0x897c, 0xffffffff, 0x06000100,
937         0x9868, 0xffffffff, 0x00000100,
938         0x9510, 0xffffffff, 0x00000100,
939         0xaf04, 0xffffffff, 0x00000100,
940         0xae04, 0xffffffff, 0x00000100,
941         0x949c, 0xffffffff, 0x00000100,
942         0x802c, 0xffffffff, 0xe0000000,
943         0x9160, 0xffffffff, 0x00010000,
944         0x9164, 0xffffffff, 0x00030002,
945         0x9168, 0xffffffff, 0x00040007,
946         0x916c, 0xffffffff, 0x00060005,
947         0x9170, 0xffffffff, 0x00090008,
948         0x9174, 0xffffffff, 0x00020001,
949         0x9178, 0xffffffff, 0x00040003,
950         0x917c, 0xffffffff, 0x00000007,
951         0x9180, 0xffffffff, 0x00060005,
952         0x9184, 0xffffffff, 0x00090008,
953         0x9188, 0xffffffff, 0x00030002,
954         0x918c, 0xffffffff, 0x00050004,
955         0x9190, 0xffffffff, 0x00000008,
956         0x9194, 0xffffffff, 0x00070006,
957         0x9198, 0xffffffff, 0x000a0009,
958         0x919c, 0xffffffff, 0x00040003,
959         0x91a0, 0xffffffff, 0x00060005,
960         0x91a4, 0xffffffff, 0x00000009,
961         0x91a8, 0xffffffff, 0x00080007,
962         0x91ac, 0xffffffff, 0x000b000a,
963         0x91b0, 0xffffffff, 0x00050004,
964         0x91b4, 0xffffffff, 0x00070006,
965         0x91b8, 0xffffffff, 0x0008000b,
966         0x91bc, 0xffffffff, 0x000a0009,
967         0x91c0, 0xffffffff, 0x000d000c,
968         0x91c4, 0xffffffff, 0x00060005,
969         0x91c8, 0xffffffff, 0x00080007,
970         0x91cc, 0xffffffff, 0x0000000b,
971         0x91d0, 0xffffffff, 0x000a0009,
972         0x91d4, 0xffffffff, 0x000d000c,
973         0x9150, 0xffffffff, 0x96940200,
974         0x8708, 0xffffffff, 0x00900100,
975         0xc478, 0xffffffff, 0x00000080,
976         0xc404, 0xffffffff, 0x0020003f,
977         0x30, 0xffffffff, 0x0000001c,
978         0x34, 0x000f0000, 0x000f0000,
979         0x160c, 0xffffffff, 0x00000100,
980         0x1024, 0xffffffff, 0x00000100,
981         0x102c, 0x00000101, 0x00000000,
982         0x20a8, 0xffffffff, 0x00000104,
983         0x264c, 0x000c0000, 0x000c0000,
984         0x2648, 0x000c0000, 0x000c0000,
985         0x55e4, 0xff000fff, 0x00000100,
986         0x55e8, 0x00000001, 0x00000001,
987         0x2f50, 0x00000001, 0x00000001,
988         0x30cc, 0xc0000fff, 0x00000104,
989         0xc1e4, 0x00000001, 0x00000001,
990         0xd0c0, 0xfffffff0, 0x00000100,
991         0xd8c0, 0xfffffff0, 0x00000100
992 };
993
994 static const u32 hainan_mgcg_cgcg_init[] =
995 {
996         0xc400, 0xffffffff, 0xfffffffc,
997         0x802c, 0xffffffff, 0xe0000000,
998         0x9a60, 0xffffffff, 0x00000100,
999         0x92a4, 0xffffffff, 0x00000100,
1000         0xc164, 0xffffffff, 0x00000100,
1001         0x9774, 0xffffffff, 0x00000100,
1002         0x8984, 0xffffffff, 0x06000100,
1003         0x8a18, 0xffffffff, 0x00000100,
1004         0x92a0, 0xffffffff, 0x00000100,
1005         0xc380, 0xffffffff, 0x00000100,
1006         0x8b28, 0xffffffff, 0x00000100,
1007         0x9144, 0xffffffff, 0x00000100,
1008         0x8d88, 0xffffffff, 0x00000100,
1009         0x8d8c, 0xffffffff, 0x00000100,
1010         0x9030, 0xffffffff, 0x00000100,
1011         0x9034, 0xffffffff, 0x00000100,
1012         0x9038, 0xffffffff, 0x00000100,
1013         0x903c, 0xffffffff, 0x00000100,
1014         0xad80, 0xffffffff, 0x00000100,
1015         0xac54, 0xffffffff, 0x00000100,
1016         0x897c, 0xffffffff, 0x06000100,
1017         0x9868, 0xffffffff, 0x00000100,
1018         0x9510, 0xffffffff, 0x00000100,
1019         0xaf04, 0xffffffff, 0x00000100,
1020         0xae04, 0xffffffff, 0x00000100,
1021         0x949c, 0xffffffff, 0x00000100,
1022         0x802c, 0xffffffff, 0xe0000000,
1023         0x9160, 0xffffffff, 0x00010000,
1024         0x9164, 0xffffffff, 0x00030002,
1025         0x9168, 0xffffffff, 0x00040007,
1026         0x916c, 0xffffffff, 0x00060005,
1027         0x9170, 0xffffffff, 0x00090008,
1028         0x9174, 0xffffffff, 0x00020001,
1029         0x9178, 0xffffffff, 0x00040003,
1030         0x917c, 0xffffffff, 0x00000007,
1031         0x9180, 0xffffffff, 0x00060005,
1032         0x9184, 0xffffffff, 0x00090008,
1033         0x9188, 0xffffffff, 0x00030002,
1034         0x918c, 0xffffffff, 0x00050004,
1035         0x9190, 0xffffffff, 0x00000008,
1036         0x9194, 0xffffffff, 0x00070006,
1037         0x9198, 0xffffffff, 0x000a0009,
1038         0x919c, 0xffffffff, 0x00040003,
1039         0x91a0, 0xffffffff, 0x00060005,
1040         0x91a4, 0xffffffff, 0x00000009,
1041         0x91a8, 0xffffffff, 0x00080007,
1042         0x91ac, 0xffffffff, 0x000b000a,
1043         0x91b0, 0xffffffff, 0x00050004,
1044         0x91b4, 0xffffffff, 0x00070006,
1045         0x91b8, 0xffffffff, 0x0008000b,
1046         0x91bc, 0xffffffff, 0x000a0009,
1047         0x91c0, 0xffffffff, 0x000d000c,
1048         0x91c4, 0xffffffff, 0x00060005,
1049         0x91c8, 0xffffffff, 0x00080007,
1050         0x91cc, 0xffffffff, 0x0000000b,
1051         0x91d0, 0xffffffff, 0x000a0009,
1052         0x91d4, 0xffffffff, 0x000d000c,
1053         0x9150, 0xffffffff, 0x96940200,
1054         0x8708, 0xffffffff, 0x00900100,
1055         0xc478, 0xffffffff, 0x00000080,
1056         0xc404, 0xffffffff, 0x0020003f,
1057         0x30, 0xffffffff, 0x0000001c,
1058         0x34, 0x000f0000, 0x000f0000,
1059         0x160c, 0xffffffff, 0x00000100,
1060         0x1024, 0xffffffff, 0x00000100,
1061         0x20a8, 0xffffffff, 0x00000104,
1062         0x264c, 0x000c0000, 0x000c0000,
1063         0x2648, 0x000c0000, 0x000c0000,
1064         0x2f50, 0x00000001, 0x00000001,
1065         0x30cc, 0xc0000fff, 0x00000104,
1066         0xc1e4, 0x00000001, 0x00000001,
1067         0xd0c0, 0xfffffff0, 0x00000100,
1068         0xd8c0, 0xfffffff0, 0x00000100
1069 };
1070
1071 static u32 verde_pg_init[] =
1072 {
1073         0x353c, 0xffffffff, 0x40000,
1074         0x3538, 0xffffffff, 0x200010ff,
1075         0x353c, 0xffffffff, 0x0,
1076         0x353c, 0xffffffff, 0x0,
1077         0x353c, 0xffffffff, 0x0,
1078         0x353c, 0xffffffff, 0x0,
1079         0x353c, 0xffffffff, 0x0,
1080         0x353c, 0xffffffff, 0x7007,
1081         0x3538, 0xffffffff, 0x300010ff,
1082         0x353c, 0xffffffff, 0x0,
1083         0x353c, 0xffffffff, 0x0,
1084         0x353c, 0xffffffff, 0x0,
1085         0x353c, 0xffffffff, 0x0,
1086         0x353c, 0xffffffff, 0x0,
1087         0x353c, 0xffffffff, 0x400000,
1088         0x3538, 0xffffffff, 0x100010ff,
1089         0x353c, 0xffffffff, 0x0,
1090         0x353c, 0xffffffff, 0x0,
1091         0x353c, 0xffffffff, 0x0,
1092         0x353c, 0xffffffff, 0x0,
1093         0x353c, 0xffffffff, 0x0,
1094         0x353c, 0xffffffff, 0x120200,
1095         0x3538, 0xffffffff, 0x500010ff,
1096         0x353c, 0xffffffff, 0x0,
1097         0x353c, 0xffffffff, 0x0,
1098         0x353c, 0xffffffff, 0x0,
1099         0x353c, 0xffffffff, 0x0,
1100         0x353c, 0xffffffff, 0x0,
1101         0x353c, 0xffffffff, 0x1e1e16,
1102         0x3538, 0xffffffff, 0x600010ff,
1103         0x353c, 0xffffffff, 0x0,
1104         0x353c, 0xffffffff, 0x0,
1105         0x353c, 0xffffffff, 0x0,
1106         0x353c, 0xffffffff, 0x0,
1107         0x353c, 0xffffffff, 0x0,
1108         0x353c, 0xffffffff, 0x171f1e,
1109         0x3538, 0xffffffff, 0x700010ff,
1110         0x353c, 0xffffffff, 0x0,
1111         0x353c, 0xffffffff, 0x0,
1112         0x353c, 0xffffffff, 0x0,
1113         0x353c, 0xffffffff, 0x0,
1114         0x353c, 0xffffffff, 0x0,
1115         0x353c, 0xffffffff, 0x0,
1116         0x3538, 0xffffffff, 0x9ff,
1117         0x3500, 0xffffffff, 0x0,
1118         0x3504, 0xffffffff, 0x10000800,
1119         0x3504, 0xffffffff, 0xf,
1120         0x3504, 0xffffffff, 0xf,
1121         0x3500, 0xffffffff, 0x4,
1122         0x3504, 0xffffffff, 0x1000051e,
1123         0x3504, 0xffffffff, 0xffff,
1124         0x3504, 0xffffffff, 0xffff,
1125         0x3500, 0xffffffff, 0x8,
1126         0x3504, 0xffffffff, 0x80500,
1127         0x3500, 0xffffffff, 0x12,
1128         0x3504, 0xffffffff, 0x9050c,
1129         0x3500, 0xffffffff, 0x1d,
1130         0x3504, 0xffffffff, 0xb052c,
1131         0x3500, 0xffffffff, 0x2a,
1132         0x3504, 0xffffffff, 0x1053e,
1133         0x3500, 0xffffffff, 0x2d,
1134         0x3504, 0xffffffff, 0x10546,
1135         0x3500, 0xffffffff, 0x30,
1136         0x3504, 0xffffffff, 0xa054e,
1137         0x3500, 0xffffffff, 0x3c,
1138         0x3504, 0xffffffff, 0x1055f,
1139         0x3500, 0xffffffff, 0x3f,
1140         0x3504, 0xffffffff, 0x10567,
1141         0x3500, 0xffffffff, 0x42,
1142         0x3504, 0xffffffff, 0x1056f,
1143         0x3500, 0xffffffff, 0x45,
1144         0x3504, 0xffffffff, 0x10572,
1145         0x3500, 0xffffffff, 0x48,
1146         0x3504, 0xffffffff, 0x20575,
1147         0x3500, 0xffffffff, 0x4c,
1148         0x3504, 0xffffffff, 0x190801,
1149         0x3500, 0xffffffff, 0x67,
1150         0x3504, 0xffffffff, 0x1082a,
1151         0x3500, 0xffffffff, 0x6a,
1152         0x3504, 0xffffffff, 0x1b082d,
1153         0x3500, 0xffffffff, 0x87,
1154         0x3504, 0xffffffff, 0x310851,
1155         0x3500, 0xffffffff, 0xba,
1156         0x3504, 0xffffffff, 0x891,
1157         0x3500, 0xffffffff, 0xbc,
1158         0x3504, 0xffffffff, 0x893,
1159         0x3500, 0xffffffff, 0xbe,
1160         0x3504, 0xffffffff, 0x20895,
1161         0x3500, 0xffffffff, 0xc2,
1162         0x3504, 0xffffffff, 0x20899,
1163         0x3500, 0xffffffff, 0xc6,
1164         0x3504, 0xffffffff, 0x2089d,
1165         0x3500, 0xffffffff, 0xca,
1166         0x3504, 0xffffffff, 0x8a1,
1167         0x3500, 0xffffffff, 0xcc,
1168         0x3504, 0xffffffff, 0x8a3,
1169         0x3500, 0xffffffff, 0xce,
1170         0x3504, 0xffffffff, 0x308a5,
1171         0x3500, 0xffffffff, 0xd3,
1172         0x3504, 0xffffffff, 0x6d08cd,
1173         0x3500, 0xffffffff, 0x142,
1174         0x3504, 0xffffffff, 0x2000095a,
1175         0x3504, 0xffffffff, 0x1,
1176         0x3500, 0xffffffff, 0x144,
1177         0x3504, 0xffffffff, 0x301f095b,
1178         0x3500, 0xffffffff, 0x165,
1179         0x3504, 0xffffffff, 0xc094d,
1180         0x3500, 0xffffffff, 0x173,
1181         0x3504, 0xffffffff, 0xf096d,
1182         0x3500, 0xffffffff, 0x184,
1183         0x3504, 0xffffffff, 0x15097f,
1184         0x3500, 0xffffffff, 0x19b,
1185         0x3504, 0xffffffff, 0xc0998,
1186         0x3500, 0xffffffff, 0x1a9,
1187         0x3504, 0xffffffff, 0x409a7,
1188         0x3500, 0xffffffff, 0x1af,
1189         0x3504, 0xffffffff, 0xcdc,
1190         0x3500, 0xffffffff, 0x1b1,
1191         0x3504, 0xffffffff, 0x800,
1192         0x3508, 0xffffffff, 0x6c9b2000,
1193         0x3510, 0xfc00, 0x2000,
1194         0x3544, 0xffffffff, 0xfc0,
1195         0x28d4, 0x00000100, 0x100
1196 };
1197
1198 static void si_init_golden_registers(struct radeon_device *rdev)
1199 {
1200         switch (rdev->family) {
1201         case CHIP_TAHITI:
1202                 radeon_program_register_sequence(rdev,
1203                                                  tahiti_golden_registers,
1204                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1205                 radeon_program_register_sequence(rdev,
1206                                                  tahiti_golden_rlc_registers,
1207                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1208                 radeon_program_register_sequence(rdev,
1209                                                  tahiti_mgcg_cgcg_init,
1210                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1211                 radeon_program_register_sequence(rdev,
1212                                                  tahiti_golden_registers2,
1213                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1214                 break;
1215         case CHIP_PITCAIRN:
1216                 radeon_program_register_sequence(rdev,
1217                                                  pitcairn_golden_registers,
1218                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1219                 radeon_program_register_sequence(rdev,
1220                                                  pitcairn_golden_rlc_registers,
1221                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1222                 radeon_program_register_sequence(rdev,
1223                                                  pitcairn_mgcg_cgcg_init,
1224                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1225                 break;
1226         case CHIP_VERDE:
1227                 radeon_program_register_sequence(rdev,
1228                                                  verde_golden_registers,
1229                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1230                 radeon_program_register_sequence(rdev,
1231                                                  verde_golden_rlc_registers,
1232                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1233                 radeon_program_register_sequence(rdev,
1234                                                  verde_mgcg_cgcg_init,
1235                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1236                 radeon_program_register_sequence(rdev,
1237                                                  verde_pg_init,
1238                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1239                 break;
1240         case CHIP_OLAND:
1241                 radeon_program_register_sequence(rdev,
1242                                                  oland_golden_registers,
1243                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1244                 radeon_program_register_sequence(rdev,
1245                                                  oland_golden_rlc_registers,
1246                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1247                 radeon_program_register_sequence(rdev,
1248                                                  oland_mgcg_cgcg_init,
1249                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1250                 break;
1251         case CHIP_HAINAN:
1252                 radeon_program_register_sequence(rdev,
1253                                                  hainan_golden_registers,
1254                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1255                 radeon_program_register_sequence(rdev,
1256                                                  hainan_golden_registers2,
1257                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1258                 radeon_program_register_sequence(rdev,
1259                                                  hainan_mgcg_cgcg_init,
1260                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1261                 break;
1262         default:
1263                 break;
1264         }
1265 }
1266
1267 #define PCIE_BUS_CLK                10000
1268 #define TCLK                        (PCIE_BUS_CLK / 10)
1269
1270 /**
1271  * si_get_xclk - get the xclk
1272  *
1273  * @rdev: radeon_device pointer
1274  *
1275  * Returns the reference clock used by the gfx engine
1276  * (SI).
1277  */
1278 u32 si_get_xclk(struct radeon_device *rdev)
1279 {
1280         u32 reference_clock = rdev->clock.spll.reference_freq;
1281         u32 tmp;
1282
1283         tmp = RREG32(CG_CLKPIN_CNTL_2);
1284         if (tmp & MUX_TCLK_TO_XCLK)
1285                 return TCLK;
1286
1287         tmp = RREG32(CG_CLKPIN_CNTL);
1288         if (tmp & XTALIN_DIVIDE)
1289                 return reference_clock / 4;
1290
1291         return reference_clock;
1292 }
1293
1294 /* get temperature in millidegrees */
1295 int si_get_temp(struct radeon_device *rdev)
1296 {
1297         u32 temp;
1298         int actual_temp = 0;
1299
1300         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1301                 CTF_TEMP_SHIFT;
1302
1303         if (temp & 0x200)
1304                 actual_temp = 255;
1305         else
1306                 actual_temp = temp & 0x1ff;
1307
1308         actual_temp = (actual_temp * 1000);
1309
1310         return actual_temp;
1311 }
1312
1313 #define TAHITI_IO_MC_REGS_SIZE 36
1314
1315 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1316         {0x0000006f, 0x03044000},
1317         {0x00000070, 0x0480c018},
1318         {0x00000071, 0x00000040},
1319         {0x00000072, 0x01000000},
1320         {0x00000074, 0x000000ff},
1321         {0x00000075, 0x00143400},
1322         {0x00000076, 0x08ec0800},
1323         {0x00000077, 0x040000cc},
1324         {0x00000079, 0x00000000},
1325         {0x0000007a, 0x21000409},
1326         {0x0000007c, 0x00000000},
1327         {0x0000007d, 0xe8000000},
1328         {0x0000007e, 0x044408a8},
1329         {0x0000007f, 0x00000003},
1330         {0x00000080, 0x00000000},
1331         {0x00000081, 0x01000000},
1332         {0x00000082, 0x02000000},
1333         {0x00000083, 0x00000000},
1334         {0x00000084, 0xe3f3e4f4},
1335         {0x00000085, 0x00052024},
1336         {0x00000087, 0x00000000},
1337         {0x00000088, 0x66036603},
1338         {0x00000089, 0x01000000},
1339         {0x0000008b, 0x1c0a0000},
1340         {0x0000008c, 0xff010000},
1341         {0x0000008e, 0xffffefff},
1342         {0x0000008f, 0xfff3efff},
1343         {0x00000090, 0xfff3efbf},
1344         {0x00000094, 0x00101101},
1345         {0x00000095, 0x00000fff},
1346         {0x00000096, 0x00116fff},
1347         {0x00000097, 0x60010000},
1348         {0x00000098, 0x10010000},
1349         {0x00000099, 0x00006000},
1350         {0x0000009a, 0x00001000},
1351         {0x0000009f, 0x00a77400}
1352 };
1353
1354 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1355         {0x0000006f, 0x03044000},
1356         {0x00000070, 0x0480c018},
1357         {0x00000071, 0x00000040},
1358         {0x00000072, 0x01000000},
1359         {0x00000074, 0x000000ff},
1360         {0x00000075, 0x00143400},
1361         {0x00000076, 0x08ec0800},
1362         {0x00000077, 0x040000cc},
1363         {0x00000079, 0x00000000},
1364         {0x0000007a, 0x21000409},
1365         {0x0000007c, 0x00000000},
1366         {0x0000007d, 0xe8000000},
1367         {0x0000007e, 0x044408a8},
1368         {0x0000007f, 0x00000003},
1369         {0x00000080, 0x00000000},
1370         {0x00000081, 0x01000000},
1371         {0x00000082, 0x02000000},
1372         {0x00000083, 0x00000000},
1373         {0x00000084, 0xe3f3e4f4},
1374         {0x00000085, 0x00052024},
1375         {0x00000087, 0x00000000},
1376         {0x00000088, 0x66036603},
1377         {0x00000089, 0x01000000},
1378         {0x0000008b, 0x1c0a0000},
1379         {0x0000008c, 0xff010000},
1380         {0x0000008e, 0xffffefff},
1381         {0x0000008f, 0xfff3efff},
1382         {0x00000090, 0xfff3efbf},
1383         {0x00000094, 0x00101101},
1384         {0x00000095, 0x00000fff},
1385         {0x00000096, 0x00116fff},
1386         {0x00000097, 0x60010000},
1387         {0x00000098, 0x10010000},
1388         {0x00000099, 0x00006000},
1389         {0x0000009a, 0x00001000},
1390         {0x0000009f, 0x00a47400}
1391 };
1392
1393 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1394         {0x0000006f, 0x03044000},
1395         {0x00000070, 0x0480c018},
1396         {0x00000071, 0x00000040},
1397         {0x00000072, 0x01000000},
1398         {0x00000074, 0x000000ff},
1399         {0x00000075, 0x00143400},
1400         {0x00000076, 0x08ec0800},
1401         {0x00000077, 0x040000cc},
1402         {0x00000079, 0x00000000},
1403         {0x0000007a, 0x21000409},
1404         {0x0000007c, 0x00000000},
1405         {0x0000007d, 0xe8000000},
1406         {0x0000007e, 0x044408a8},
1407         {0x0000007f, 0x00000003},
1408         {0x00000080, 0x00000000},
1409         {0x00000081, 0x01000000},
1410         {0x00000082, 0x02000000},
1411         {0x00000083, 0x00000000},
1412         {0x00000084, 0xe3f3e4f4},
1413         {0x00000085, 0x00052024},
1414         {0x00000087, 0x00000000},
1415         {0x00000088, 0x66036603},
1416         {0x00000089, 0x01000000},
1417         {0x0000008b, 0x1c0a0000},
1418         {0x0000008c, 0xff010000},
1419         {0x0000008e, 0xffffefff},
1420         {0x0000008f, 0xfff3efff},
1421         {0x00000090, 0xfff3efbf},
1422         {0x00000094, 0x00101101},
1423         {0x00000095, 0x00000fff},
1424         {0x00000096, 0x00116fff},
1425         {0x00000097, 0x60010000},
1426         {0x00000098, 0x10010000},
1427         {0x00000099, 0x00006000},
1428         {0x0000009a, 0x00001000},
1429         {0x0000009f, 0x00a37400}
1430 };
1431
1432 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1433         {0x0000006f, 0x03044000},
1434         {0x00000070, 0x0480c018},
1435         {0x00000071, 0x00000040},
1436         {0x00000072, 0x01000000},
1437         {0x00000074, 0x000000ff},
1438         {0x00000075, 0x00143400},
1439         {0x00000076, 0x08ec0800},
1440         {0x00000077, 0x040000cc},
1441         {0x00000079, 0x00000000},
1442         {0x0000007a, 0x21000409},
1443         {0x0000007c, 0x00000000},
1444         {0x0000007d, 0xe8000000},
1445         {0x0000007e, 0x044408a8},
1446         {0x0000007f, 0x00000003},
1447         {0x00000080, 0x00000000},
1448         {0x00000081, 0x01000000},
1449         {0x00000082, 0x02000000},
1450         {0x00000083, 0x00000000},
1451         {0x00000084, 0xe3f3e4f4},
1452         {0x00000085, 0x00052024},
1453         {0x00000087, 0x00000000},
1454         {0x00000088, 0x66036603},
1455         {0x00000089, 0x01000000},
1456         {0x0000008b, 0x1c0a0000},
1457         {0x0000008c, 0xff010000},
1458         {0x0000008e, 0xffffefff},
1459         {0x0000008f, 0xfff3efff},
1460         {0x00000090, 0xfff3efbf},
1461         {0x00000094, 0x00101101},
1462         {0x00000095, 0x00000fff},
1463         {0x00000096, 0x00116fff},
1464         {0x00000097, 0x60010000},
1465         {0x00000098, 0x10010000},
1466         {0x00000099, 0x00006000},
1467         {0x0000009a, 0x00001000},
1468         {0x0000009f, 0x00a17730}
1469 };
1470
1471 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1472         {0x0000006f, 0x03044000},
1473         {0x00000070, 0x0480c018},
1474         {0x00000071, 0x00000040},
1475         {0x00000072, 0x01000000},
1476         {0x00000074, 0x000000ff},
1477         {0x00000075, 0x00143400},
1478         {0x00000076, 0x08ec0800},
1479         {0x00000077, 0x040000cc},
1480         {0x00000079, 0x00000000},
1481         {0x0000007a, 0x21000409},
1482         {0x0000007c, 0x00000000},
1483         {0x0000007d, 0xe8000000},
1484         {0x0000007e, 0x044408a8},
1485         {0x0000007f, 0x00000003},
1486         {0x00000080, 0x00000000},
1487         {0x00000081, 0x01000000},
1488         {0x00000082, 0x02000000},
1489         {0x00000083, 0x00000000},
1490         {0x00000084, 0xe3f3e4f4},
1491         {0x00000085, 0x00052024},
1492         {0x00000087, 0x00000000},
1493         {0x00000088, 0x66036603},
1494         {0x00000089, 0x01000000},
1495         {0x0000008b, 0x1c0a0000},
1496         {0x0000008c, 0xff010000},
1497         {0x0000008e, 0xffffefff},
1498         {0x0000008f, 0xfff3efff},
1499         {0x00000090, 0xfff3efbf},
1500         {0x00000094, 0x00101101},
1501         {0x00000095, 0x00000fff},
1502         {0x00000096, 0x00116fff},
1503         {0x00000097, 0x60010000},
1504         {0x00000098, 0x10010000},
1505         {0x00000099, 0x00006000},
1506         {0x0000009a, 0x00001000},
1507         {0x0000009f, 0x00a07730}
1508 };
1509
1510 /* ucode loading */
1511 int si_mc_load_microcode(struct radeon_device *rdev)
1512 {
1513         const __be32 *fw_data = NULL;
1514         const __le32 *new_fw_data = NULL;
1515         u32 running, blackout = 0;
1516         u32 *io_mc_regs = NULL;
1517         const __le32 *new_io_mc_regs = NULL;
1518         int i, regs_size, ucode_size;
1519
1520         if (!rdev->mc_fw)
1521                 return -EINVAL;
1522
1523         if (rdev->new_fw) {
1524                 const struct mc_firmware_header_v1_0 *hdr =
1525                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1526
1527                 radeon_ucode_print_mc_hdr(&hdr->header);
1528                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1529                 new_io_mc_regs = (const __le32 *)
1530                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1531                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1532                 new_fw_data = (const __le32 *)
1533                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1534         } else {
1535                 ucode_size = rdev->mc_fw->size / 4;
1536
1537                 switch (rdev->family) {
1538                 case CHIP_TAHITI:
1539                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1540                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1541                         break;
1542                 case CHIP_PITCAIRN:
1543                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1544                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1545                         break;
1546                 case CHIP_VERDE:
1547                 default:
1548                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1549                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1550                         break;
1551                 case CHIP_OLAND:
1552                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1553                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1554                         break;
1555                 case CHIP_HAINAN:
1556                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1557                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1558                         break;
1559                 }
1560                 fw_data = (const __be32 *)rdev->mc_fw->data;
1561         }
1562
1563         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1564
1565         if (running == 0) {
1566                 if (running) {
1567                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1568                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1569                 }
1570
1571                 /* reset the engine and set to writable */
1572                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1573                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1574
1575                 /* load mc io regs */
1576                 for (i = 0; i < regs_size; i++) {
1577                         if (rdev->new_fw) {
1578                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1579                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1580                         } else {
1581                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1582                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1583                         }
1584                 }
1585                 /* load the MC ucode */
1586                 for (i = 0; i < ucode_size; i++) {
1587                         if (rdev->new_fw)
1588                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1589                         else
1590                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1591                 }
1592
1593                 /* put the engine back into the active state */
1594                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1595                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1596                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1597
1598                 /* wait for training to complete */
1599                 for (i = 0; i < rdev->usec_timeout; i++) {
1600                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1601                                 break;
1602                         udelay(1);
1603                 }
1604                 for (i = 0; i < rdev->usec_timeout; i++) {
1605                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1606                                 break;
1607                         udelay(1);
1608                 }
1609
1610                 if (running)
1611                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1612         }
1613
1614         return 0;
1615 }
1616
1617 static int si_init_microcode(struct radeon_device *rdev)
1618 {
1619         const char *chip_name;
1620         const char *new_chip_name;
1621         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1622         size_t smc_req_size, mc2_req_size;
1623         char fw_name[30];
1624         int err;
1625         int new_fw = 0;
1626
1627         DRM_DEBUG("\n");
1628
1629         switch (rdev->family) {
1630         case CHIP_TAHITI:
1631                 chip_name = "TAHITI";
1632                 new_chip_name = "tahiti";
1633                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1634                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1635                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1636                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1637                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1638                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1639                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1640                 break;
1641         case CHIP_PITCAIRN:
1642                 chip_name = "PITCAIRN";
1643                 new_chip_name = "pitcairn";
1644                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1645                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1646                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1647                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1648                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1649                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1650                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1651                 break;
1652         case CHIP_VERDE:
1653                 chip_name = "VERDE";
1654                 new_chip_name = "verde";
1655                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1656                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1657                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1658                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1659                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1660                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1661                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1662                 break;
1663         case CHIP_OLAND:
1664                 chip_name = "OLAND";
1665                 new_chip_name = "oland";
1666                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1667                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1668                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1669                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1670                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1671                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1672                 break;
1673         case CHIP_HAINAN:
1674                 chip_name = "HAINAN";
1675                 new_chip_name = "hainan";
1676                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1677                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1678                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1679                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1680                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1681                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1682                 break;
1683         default: BUG();
1684         }
1685
1686         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1687
1688         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1689         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1690         if (err) {
1691                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1692                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1693                 if (err)
1694                         goto out;
1695                 if (rdev->pfp_fw->size != pfp_req_size) {
1696                         printk(KERN_ERR
1697                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1698                                rdev->pfp_fw->size, fw_name);
1699                         err = -EINVAL;
1700                         goto out;
1701                 }
1702         } else {
1703                 err = radeon_ucode_validate(rdev->pfp_fw);
1704                 if (err) {
1705                         printk(KERN_ERR
1706                                "si_cp: validation failed for firmware \"%s\"\n",
1707                                fw_name);
1708                         goto out;
1709                 } else {
1710                         new_fw++;
1711                 }
1712         }
1713
1714         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1715         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1716         if (err) {
1717                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1718                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1719                 if (err)
1720                         goto out;
1721                 if (rdev->me_fw->size != me_req_size) {
1722                         printk(KERN_ERR
1723                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1724                                rdev->me_fw->size, fw_name);
1725                         err = -EINVAL;
1726                 }
1727         } else {
1728                 err = radeon_ucode_validate(rdev->me_fw);
1729                 if (err) {
1730                         printk(KERN_ERR
1731                                "si_cp: validation failed for firmware \"%s\"\n",
1732                                fw_name);
1733                         goto out;
1734                 } else {
1735                         new_fw++;
1736                 }
1737         }
1738
1739         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1740         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1741         if (err) {
1742                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1743                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1744                 if (err)
1745                         goto out;
1746                 if (rdev->ce_fw->size != ce_req_size) {
1747                         printk(KERN_ERR
1748                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1749                                rdev->ce_fw->size, fw_name);
1750                         err = -EINVAL;
1751                 }
1752         } else {
1753                 err = radeon_ucode_validate(rdev->ce_fw);
1754                 if (err) {
1755                         printk(KERN_ERR
1756                                "si_cp: validation failed for firmware \"%s\"\n",
1757                                fw_name);
1758                         goto out;
1759                 } else {
1760                         new_fw++;
1761                 }
1762         }
1763
1764         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1765         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1766         if (err) {
1767                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1768                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1769                 if (err)
1770                         goto out;
1771                 if (rdev->rlc_fw->size != rlc_req_size) {
1772                         printk(KERN_ERR
1773                                "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1774                                rdev->rlc_fw->size, fw_name);
1775                         err = -EINVAL;
1776                 }
1777         } else {
1778                 err = radeon_ucode_validate(rdev->rlc_fw);
1779                 if (err) {
1780                         printk(KERN_ERR
1781                                "si_cp: validation failed for firmware \"%s\"\n",
1782                                fw_name);
1783                         goto out;
1784                 } else {
1785                         new_fw++;
1786                 }
1787         }
1788
1789         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1790         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1791         if (err) {
1792                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1793                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1794                 if (err) {
1795                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1796                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1797                         if (err)
1798                                 goto out;
1799                 }
1800                 if ((rdev->mc_fw->size != mc_req_size) &&
1801                     (rdev->mc_fw->size != mc2_req_size)) {
1802                         printk(KERN_ERR
1803                                "si_mc: Bogus length %zu in firmware \"%s\"\n",
1804                                rdev->mc_fw->size, fw_name);
1805                         err = -EINVAL;
1806                 }
1807                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1808         } else {
1809                 err = radeon_ucode_validate(rdev->mc_fw);
1810                 if (err) {
1811                         printk(KERN_ERR
1812                                "si_cp: validation failed for firmware \"%s\"\n",
1813                                fw_name);
1814                         goto out;
1815                 } else {
1816                         new_fw++;
1817                 }
1818         }
1819
1820         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1821         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1822         if (err) {
1823                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1824                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1825                 if (err) {
1826                         printk(KERN_ERR
1827                                "smc: error loading firmware \"%s\"\n",
1828                                fw_name);
1829                         release_firmware(rdev->smc_fw);
1830                         rdev->smc_fw = NULL;
1831                         err = 0;
1832                 } else if (rdev->smc_fw->size != smc_req_size) {
1833                         printk(KERN_ERR
1834                                "si_smc: Bogus length %zu in firmware \"%s\"\n",
1835                                rdev->smc_fw->size, fw_name);
1836                         err = -EINVAL;
1837                 }
1838         } else {
1839                 err = radeon_ucode_validate(rdev->smc_fw);
1840                 if (err) {
1841                         printk(KERN_ERR
1842                                "si_cp: validation failed for firmware \"%s\"\n",
1843                                fw_name);
1844                         goto out;
1845                 } else {
1846                         new_fw++;
1847                 }
1848         }
1849
1850         if (new_fw == 0) {
1851                 rdev->new_fw = false;
1852         } else if (new_fw < 6) {
1853                 printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1854                 err = -EINVAL;
1855         } else {
1856                 rdev->new_fw = true;
1857         }
1858 out:
1859         if (err) {
1860                 if (err != -EINVAL)
1861                         printk(KERN_ERR
1862                                "si_cp: Failed to load firmware \"%s\"\n",
1863                                fw_name);
1864                 release_firmware(rdev->pfp_fw);
1865                 rdev->pfp_fw = NULL;
1866                 release_firmware(rdev->me_fw);
1867                 rdev->me_fw = NULL;
1868                 release_firmware(rdev->ce_fw);
1869                 rdev->ce_fw = NULL;
1870                 release_firmware(rdev->rlc_fw);
1871                 rdev->rlc_fw = NULL;
1872                 release_firmware(rdev->mc_fw);
1873                 rdev->mc_fw = NULL;
1874                 release_firmware(rdev->smc_fw);
1875                 rdev->smc_fw = NULL;
1876         }
1877         return err;
1878 }
1879
1880 /* watermark setup */
1881 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1882                                    struct radeon_crtc *radeon_crtc,
1883                                    struct drm_display_mode *mode,
1884                                    struct drm_display_mode *other_mode)
1885 {
1886         u32 tmp, buffer_alloc, i;
1887         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1888         /*
1889          * Line Buffer Setup
1890          * There are 3 line buffers, each one shared by 2 display controllers.
1891          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1892          * the display controllers.  The paritioning is done via one of four
1893          * preset allocations specified in bits 21:20:
1894          *  0 - half lb
1895          *  2 - whole lb, other crtc must be disabled
1896          */
1897         /* this can get tricky if we have two large displays on a paired group
1898          * of crtcs.  Ideally for multiple large displays we'd assign them to
1899          * non-linked crtcs for maximum line buffer allocation.
1900          */
1901         if (radeon_crtc->base.enabled && mode) {
1902                 if (other_mode) {
1903                         tmp = 0; /* 1/2 */
1904                         buffer_alloc = 1;
1905                 } else {
1906                         tmp = 2; /* whole */
1907                         buffer_alloc = 2;
1908                 }
1909         } else {
1910                 tmp = 0;
1911                 buffer_alloc = 0;
1912         }
1913
1914         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1915                DC_LB_MEMORY_CONFIG(tmp));
1916
1917         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1918                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1919         for (i = 0; i < rdev->usec_timeout; i++) {
1920                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1921                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1922                         break;
1923                 udelay(1);
1924         }
1925
1926         if (radeon_crtc->base.enabled && mode) {
1927                 switch (tmp) {
1928                 case 0:
1929                 default:
1930                         return 4096 * 2;
1931                 case 2:
1932                         return 8192 * 2;
1933                 }
1934         }
1935
1936         /* controller not enabled, so no lb used */
1937         return 0;
1938 }
1939
1940 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1941 {
1942         u32 tmp = RREG32(MC_SHARED_CHMAP);
1943
1944         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1945         case 0:
1946         default:
1947                 return 1;
1948         case 1:
1949                 return 2;
1950         case 2:
1951                 return 4;
1952         case 3:
1953                 return 8;
1954         case 4:
1955                 return 3;
1956         case 5:
1957                 return 6;
1958         case 6:
1959                 return 10;
1960         case 7:
1961                 return 12;
1962         case 8:
1963                 return 16;
1964         }
1965 }
1966
1967 struct dce6_wm_params {
1968         u32 dram_channels; /* number of dram channels */
1969         u32 yclk;          /* bandwidth per dram data pin in kHz */
1970         u32 sclk;          /* engine clock in kHz */
1971         u32 disp_clk;      /* display clock in kHz */
1972         u32 src_width;     /* viewport width */
1973         u32 active_time;   /* active display time in ns */
1974         u32 blank_time;    /* blank time in ns */
1975         bool interlaced;    /* mode is interlaced */
1976         fixed20_12 vsc;    /* vertical scale ratio */
1977         u32 num_heads;     /* number of active crtcs */
1978         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1979         u32 lb_size;       /* line buffer allocated to pipe */
1980         u32 vtaps;         /* vertical scaler taps */
1981 };
1982
1983 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1984 {
1985         /* Calculate raw DRAM Bandwidth */
1986         fixed20_12 dram_efficiency; /* 0.7 */
1987         fixed20_12 yclk, dram_channels, bandwidth;
1988         fixed20_12 a;
1989
1990         a.full = dfixed_const(1000);
1991         yclk.full = dfixed_const(wm->yclk);
1992         yclk.full = dfixed_div(yclk, a);
1993         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1994         a.full = dfixed_const(10);
1995         dram_efficiency.full = dfixed_const(7);
1996         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1997         bandwidth.full = dfixed_mul(dram_channels, yclk);
1998         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1999
2000         return dfixed_trunc(bandwidth);
2001 }
2002
2003 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2004 {
2005         /* Calculate DRAM Bandwidth and the part allocated to display. */
2006         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2007         fixed20_12 yclk, dram_channels, bandwidth;
2008         fixed20_12 a;
2009
2010         a.full = dfixed_const(1000);
2011         yclk.full = dfixed_const(wm->yclk);
2012         yclk.full = dfixed_div(yclk, a);
2013         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2014         a.full = dfixed_const(10);
2015         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2016         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2017         bandwidth.full = dfixed_mul(dram_channels, yclk);
2018         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2019
2020         return dfixed_trunc(bandwidth);
2021 }
2022
2023 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2024 {
2025         /* Calculate the display Data return Bandwidth */
2026         fixed20_12 return_efficiency; /* 0.8 */
2027         fixed20_12 sclk, bandwidth;
2028         fixed20_12 a;
2029
2030         a.full = dfixed_const(1000);
2031         sclk.full = dfixed_const(wm->sclk);
2032         sclk.full = dfixed_div(sclk, a);
2033         a.full = dfixed_const(10);
2034         return_efficiency.full = dfixed_const(8);
2035         return_efficiency.full = dfixed_div(return_efficiency, a);
2036         a.full = dfixed_const(32);
2037         bandwidth.full = dfixed_mul(a, sclk);
2038         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2039
2040         return dfixed_trunc(bandwidth);
2041 }
2042
2043 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2044 {
2045         return 32;
2046 }
2047
2048 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2049 {
2050         /* Calculate the DMIF Request Bandwidth */
2051         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2052         fixed20_12 disp_clk, sclk, bandwidth;
2053         fixed20_12 a, b1, b2;
2054         u32 min_bandwidth;
2055
2056         a.full = dfixed_const(1000);
2057         disp_clk.full = dfixed_const(wm->disp_clk);
2058         disp_clk.full = dfixed_div(disp_clk, a);
2059         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2060         b1.full = dfixed_mul(a, disp_clk);
2061
2062         a.full = dfixed_const(1000);
2063         sclk.full = dfixed_const(wm->sclk);
2064         sclk.full = dfixed_div(sclk, a);
2065         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2066         b2.full = dfixed_mul(a, sclk);
2067
2068         a.full = dfixed_const(10);
2069         disp_clk_request_efficiency.full = dfixed_const(8);
2070         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2071
2072         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2073
2074         a.full = dfixed_const(min_bandwidth);
2075         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2076
2077         return dfixed_trunc(bandwidth);
2078 }
2079
2080 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2081 {
2082         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2083         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2084         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2085         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2086
2087         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2088 }
2089
2090 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2091 {
2092         /* Calculate the display mode Average Bandwidth
2093          * DisplayMode should contain the source and destination dimensions,
2094          * timing, etc.
2095          */
2096         fixed20_12 bpp;
2097         fixed20_12 line_time;
2098         fixed20_12 src_width;
2099         fixed20_12 bandwidth;
2100         fixed20_12 a;
2101
2102         a.full = dfixed_const(1000);
2103         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2104         line_time.full = dfixed_div(line_time, a);
2105         bpp.full = dfixed_const(wm->bytes_per_pixel);
2106         src_width.full = dfixed_const(wm->src_width);
2107         bandwidth.full = dfixed_mul(src_width, bpp);
2108         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2109         bandwidth.full = dfixed_div(bandwidth, line_time);
2110
2111         return dfixed_trunc(bandwidth);
2112 }
2113
2114 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2115 {
2116         /* First calcualte the latency in ns */
2117         u32 mc_latency = 2000; /* 2000 ns. */
2118         u32 available_bandwidth = dce6_available_bandwidth(wm);
2119         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2120         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2121         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2122         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2123                 (wm->num_heads * cursor_line_pair_return_time);
2124         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2125         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2126         u32 tmp, dmif_size = 12288;
2127         fixed20_12 a, b, c;
2128
2129         if (wm->num_heads == 0)
2130                 return 0;
2131
2132         a.full = dfixed_const(2);
2133         b.full = dfixed_const(1);
2134         if ((wm->vsc.full > a.full) ||
2135             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2136             (wm->vtaps >= 5) ||
2137             ((wm->vsc.full >= a.full) && wm->interlaced))
2138                 max_src_lines_per_dst_line = 4;
2139         else
2140                 max_src_lines_per_dst_line = 2;
2141
2142         a.full = dfixed_const(available_bandwidth);
2143         b.full = dfixed_const(wm->num_heads);
2144         a.full = dfixed_div(a, b);
2145
2146         b.full = dfixed_const(mc_latency + 512);
2147         c.full = dfixed_const(wm->disp_clk);
2148         b.full = dfixed_div(b, c);
2149
2150         c.full = dfixed_const(dmif_size);
2151         b.full = dfixed_div(c, b);
2152
2153         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2154
2155         b.full = dfixed_const(1000);
2156         c.full = dfixed_const(wm->disp_clk);
2157         b.full = dfixed_div(c, b);
2158         c.full = dfixed_const(wm->bytes_per_pixel);
2159         b.full = dfixed_mul(b, c);
2160
2161         lb_fill_bw = min(tmp, dfixed_trunc(b));
2162
2163         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2164         b.full = dfixed_const(1000);
2165         c.full = dfixed_const(lb_fill_bw);
2166         b.full = dfixed_div(c, b);
2167         a.full = dfixed_div(a, b);
2168         line_fill_time = dfixed_trunc(a);
2169
2170         if (line_fill_time < wm->active_time)
2171                 return latency;
2172         else
2173                 return latency + (line_fill_time - wm->active_time);
2174
2175 }
2176
2177 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2178 {
2179         if (dce6_average_bandwidth(wm) <=
2180             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2181                 return true;
2182         else
2183                 return false;
2184 };
2185
2186 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2187 {
2188         if (dce6_average_bandwidth(wm) <=
2189             (dce6_available_bandwidth(wm) / wm->num_heads))
2190                 return true;
2191         else
2192                 return false;
2193 };
2194
2195 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2196 {
2197         u32 lb_partitions = wm->lb_size / wm->src_width;
2198         u32 line_time = wm->active_time + wm->blank_time;
2199         u32 latency_tolerant_lines;
2200         u32 latency_hiding;
2201         fixed20_12 a;
2202
2203         a.full = dfixed_const(1);
2204         if (wm->vsc.full > a.full)
2205                 latency_tolerant_lines = 1;
2206         else {
2207                 if (lb_partitions <= (wm->vtaps + 1))
2208                         latency_tolerant_lines = 1;
2209                 else
2210                         latency_tolerant_lines = 2;
2211         }
2212
2213         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2214
2215         if (dce6_latency_watermark(wm) <= latency_hiding)
2216                 return true;
2217         else
2218                 return false;
2219 }
2220
2221 static void dce6_program_watermarks(struct radeon_device *rdev,
2222                                          struct radeon_crtc *radeon_crtc,
2223                                          u32 lb_size, u32 num_heads)
2224 {
2225         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2226         struct dce6_wm_params wm_low, wm_high;
2227         u32 dram_channels;
2228         u32 pixel_period;
2229         u32 line_time = 0;
2230         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2231         u32 priority_a_mark = 0, priority_b_mark = 0;
2232         u32 priority_a_cnt = PRIORITY_OFF;
2233         u32 priority_b_cnt = PRIORITY_OFF;
2234         u32 tmp, arb_control3;
2235         fixed20_12 a, b, c;
2236
2237         if (radeon_crtc->base.enabled && num_heads && mode) {
2238                 pixel_period = 1000000 / (u32)mode->clock;
2239                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2240                 priority_a_cnt = 0;
2241                 priority_b_cnt = 0;
2242
2243                 if (rdev->family == CHIP_ARUBA)
2244                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2245                 else
2246                         dram_channels = si_get_number_of_dram_channels(rdev);
2247
2248                 /* watermark for high clocks */
2249                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2250                         wm_high.yclk =
2251                                 radeon_dpm_get_mclk(rdev, false) * 10;
2252                         wm_high.sclk =
2253                                 radeon_dpm_get_sclk(rdev, false) * 10;
2254                 } else {
2255                         wm_high.yclk = rdev->pm.current_mclk * 10;
2256                         wm_high.sclk = rdev->pm.current_sclk * 10;
2257                 }
2258
2259                 wm_high.disp_clk = mode->clock;
2260                 wm_high.src_width = mode->crtc_hdisplay;
2261                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2262                 wm_high.blank_time = line_time - wm_high.active_time;
2263                 wm_high.interlaced = false;
2264                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2265                         wm_high.interlaced = true;
2266                 wm_high.vsc = radeon_crtc->vsc;
2267                 wm_high.vtaps = 1;
2268                 if (radeon_crtc->rmx_type != RMX_OFF)
2269                         wm_high.vtaps = 2;
2270                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2271                 wm_high.lb_size = lb_size;
2272                 wm_high.dram_channels = dram_channels;
2273                 wm_high.num_heads = num_heads;
2274
2275                 /* watermark for low clocks */
2276                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2277                         wm_low.yclk =
2278                                 radeon_dpm_get_mclk(rdev, true) * 10;
2279                         wm_low.sclk =
2280                                 radeon_dpm_get_sclk(rdev, true) * 10;
2281                 } else {
2282                         wm_low.yclk = rdev->pm.current_mclk * 10;
2283                         wm_low.sclk = rdev->pm.current_sclk * 10;
2284                 }
2285
2286                 wm_low.disp_clk = mode->clock;
2287                 wm_low.src_width = mode->crtc_hdisplay;
2288                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2289                 wm_low.blank_time = line_time - wm_low.active_time;
2290                 wm_low.interlaced = false;
2291                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2292                         wm_low.interlaced = true;
2293                 wm_low.vsc = radeon_crtc->vsc;
2294                 wm_low.vtaps = 1;
2295                 if (radeon_crtc->rmx_type != RMX_OFF)
2296                         wm_low.vtaps = 2;
2297                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2298                 wm_low.lb_size = lb_size;
2299                 wm_low.dram_channels = dram_channels;
2300                 wm_low.num_heads = num_heads;
2301
2302                 /* set for high clocks */
2303                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2304                 /* set for low clocks */
2305                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2306
2307                 /* possibly force display priority to high */
2308                 /* should really do this at mode validation time... */
2309                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2310                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2311                     !dce6_check_latency_hiding(&wm_high) ||
2312                     (rdev->disp_priority == 2)) {
2313                         DRM_DEBUG_KMS("force priority to high\n");
2314                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2315                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2316                 }
2317                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2318                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2319                     !dce6_check_latency_hiding(&wm_low) ||
2320                     (rdev->disp_priority == 2)) {
2321                         DRM_DEBUG_KMS("force priority to high\n");
2322                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2323                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2324                 }
2325
2326                 a.full = dfixed_const(1000);
2327                 b.full = dfixed_const(mode->clock);
2328                 b.full = dfixed_div(b, a);
2329                 c.full = dfixed_const(latency_watermark_a);
2330                 c.full = dfixed_mul(c, b);
2331                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2332                 c.full = dfixed_div(c, a);
2333                 a.full = dfixed_const(16);
2334                 c.full = dfixed_div(c, a);
2335                 priority_a_mark = dfixed_trunc(c);
2336                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2337
2338                 a.full = dfixed_const(1000);
2339                 b.full = dfixed_const(mode->clock);
2340                 b.full = dfixed_div(b, a);
2341                 c.full = dfixed_const(latency_watermark_b);
2342                 c.full = dfixed_mul(c, b);
2343                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2344                 c.full = dfixed_div(c, a);
2345                 a.full = dfixed_const(16);
2346                 c.full = dfixed_div(c, a);
2347                 priority_b_mark = dfixed_trunc(c);
2348                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2349         }
2350
2351         /* select wm A */
2352         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2353         tmp = arb_control3;
2354         tmp &= ~LATENCY_WATERMARK_MASK(3);
2355         tmp |= LATENCY_WATERMARK_MASK(1);
2356         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2357         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2358                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2359                 LATENCY_HIGH_WATERMARK(line_time)));
2360         /* select wm B */
2361         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2362         tmp &= ~LATENCY_WATERMARK_MASK(3);
2363         tmp |= LATENCY_WATERMARK_MASK(2);
2364         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2365         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2366                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2367                 LATENCY_HIGH_WATERMARK(line_time)));
2368         /* restore original selection */
2369         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2370
2371         /* write the priority marks */
2372         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2373         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2374
2375         /* save values for DPM */
2376         radeon_crtc->line_time = line_time;
2377         radeon_crtc->wm_high = latency_watermark_a;
2378         radeon_crtc->wm_low = latency_watermark_b;
2379 }
2380
2381 void dce6_bandwidth_update(struct radeon_device *rdev)
2382 {
2383         struct drm_display_mode *mode0 = NULL;
2384         struct drm_display_mode *mode1 = NULL;
2385         u32 num_heads = 0, lb_size;
2386         int i;
2387
2388         if (!rdev->mode_info.mode_config_initialized)
2389                 return;
2390
2391         radeon_update_display_priority(rdev);
2392
2393         for (i = 0; i < rdev->num_crtc; i++) {
2394                 if (rdev->mode_info.crtcs[i]->base.enabled)
2395                         num_heads++;
2396         }
2397         for (i = 0; i < rdev->num_crtc; i += 2) {
2398                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2399                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2400                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2401                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2402                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2403                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2404         }
2405 }
2406
2407 /*
2408  * Core functions
2409  */
2410 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2411 {
2412         const u32 num_tile_mode_states = 32;
2413         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2414
2415         switch (rdev->config.si.mem_row_size_in_kb) {
2416         case 1:
2417                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2418                 break;
2419         case 2:
2420         default:
2421                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2422                 break;
2423         case 4:
2424                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2425                 break;
2426         }
2427
2428         if ((rdev->family == CHIP_TAHITI) ||
2429             (rdev->family == CHIP_PITCAIRN)) {
2430                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2431                         switch (reg_offset) {
2432                         case 0:  /* non-AA compressed depth or any compressed stencil */
2433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2435                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2437                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2438                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2441                                 break;
2442                         case 1:  /* 2xAA/4xAA compressed depth only */
2443                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2445                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2448                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2451                                 break;
2452                         case 2:  /* 8xAA compressed depth only */
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2458                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2461                                 break;
2462                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2463                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2465                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2467                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2468                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2471                                 break;
2472                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2473                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2475                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2476                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2477                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2478                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2481                                 break;
2482                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2483                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2485                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486                                                  TILE_SPLIT(split_equal_to_row_size) |
2487                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2488                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2491                                 break;
2492                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2493                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2495                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2496                                                  TILE_SPLIT(split_equal_to_row_size) |
2497                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2498                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2501                                 break;
2502                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2503                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2505                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2506                                                  TILE_SPLIT(split_equal_to_row_size) |
2507                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2508                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2511                                 break;
2512                         case 8:  /* 1D and 1D Array Surfaces */
2513                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2514                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2516                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2517                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2518                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2520                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2521                                 break;
2522                         case 9:  /* Displayable maps. */
2523                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2524                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2525                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2527                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2528                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2531                                 break;
2532                         case 10:  /* Display 8bpp. */
2533                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2535                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2538                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541                                 break;
2542                         case 11:  /* Display 16bpp. */
2543                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2547                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2548                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2551                                 break;
2552                         case 12:  /* Display 32bpp. */
2553                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2557                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2558                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2560                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2561                                 break;
2562                         case 13:  /* Thin. */
2563                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2565                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2566                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2567                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2568                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2570                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571                                 break;
2572                         case 14:  /* Thin 8 bpp. */
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2576                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2577                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2578                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2581                                 break;
2582                         case 15:  /* Thin 16 bpp. */
2583                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2585                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2587                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2588                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2591                                 break;
2592                         case 16:  /* Thin 32 bpp. */
2593                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2595                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2596                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2597                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2598                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2601                                 break;
2602                         case 17:  /* Thin 64 bpp. */
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2606                                                  TILE_SPLIT(split_equal_to_row_size) |
2607                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2608                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2611                                 break;
2612                         case 21:  /* 8 bpp PRT. */
2613                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2615                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2617                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2618                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2619                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2621                                 break;
2622                         case 22:  /* 16 bpp PRT */
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2627                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2628                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2630                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2631                                 break;
2632                         case 23:  /* 32 bpp PRT */
2633                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2637                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2638                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2641                                 break;
2642                         case 24:  /* 64 bpp PRT */
2643                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2645                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2646                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2647                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2648                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2650                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2651                                 break;
2652                         case 25:  /* 128 bpp PRT */
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2656                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2657                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2658                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2661                                 break;
2662                         default:
2663                                 gb_tile_moden = 0;
2664                                 break;
2665                         }
2666                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2667                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2668                 }
2669         } else if ((rdev->family == CHIP_VERDE) ||
2670                    (rdev->family == CHIP_OLAND) ||
2671                    (rdev->family == CHIP_HAINAN)) {
2672                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2673                         switch (reg_offset) {
2674                         case 0:  /* non-AA compressed depth or any compressed stencil */
2675                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2677                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2679                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2680                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2682                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2683                                 break;
2684                         case 1:  /* 2xAA/4xAA compressed depth only */
2685                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2687                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2688                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2689                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2690                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2693                                 break;
2694                         case 2:  /* 8xAA compressed depth only */
2695                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2697                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2700                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2702                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2703                                 break;
2704                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2708                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2709                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2710                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2712                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2713                                 break;
2714                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2715                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2716                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2717                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2719                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2720                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2723                                 break;
2724                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2725                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2727                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728                                                  TILE_SPLIT(split_equal_to_row_size) |
2729                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2730                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2732                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2733                                 break;
2734                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2735                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2737                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2738                                                  TILE_SPLIT(split_equal_to_row_size) |
2739                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2740                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2743                                 break;
2744                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2745                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2747                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                                                  TILE_SPLIT(split_equal_to_row_size) |
2749                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2750                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2753                                 break;
2754                         case 8:  /* 1D and 1D Array Surfaces */
2755                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2757                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2759                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2760                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2762                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2763                                 break;
2764                         case 9:  /* Displayable maps. */
2765                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2766                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2767                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2769                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2770                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773                                 break;
2774                         case 10:  /* Display 8bpp. */
2775                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2777                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2779                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2780                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2782                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2783                                 break;
2784                         case 11:  /* Display 16bpp. */
2785                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2787                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2789                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2790                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793                                 break;
2794                         case 12:  /* Display 32bpp. */
2795                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2797                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2799                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2800                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2802                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2803                                 break;
2804                         case 13:  /* Thin. */
2805                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2807                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2809                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2810                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2812                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2813                                 break;
2814                         case 14:  /* Thin 8 bpp. */
2815                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2817                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2819                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2820                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2822                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2823                                 break;
2824                         case 15:  /* Thin 16 bpp. */
2825                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2827                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2828                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2829                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2830                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2832                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2833                                 break;
2834                         case 16:  /* Thin 32 bpp. */
2835                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2837                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2839                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2840                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2843                                 break;
2844                         case 17:  /* Thin 64 bpp. */
2845                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2847                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2848                                                  TILE_SPLIT(split_equal_to_row_size) |
2849                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2850                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2853                                 break;
2854                         case 21:  /* 8 bpp PRT. */
2855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2858                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2859                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2860                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863                                 break;
2864                         case 22:  /* 16 bpp PRT */
2865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2868                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2870                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2872                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2873                                 break;
2874                         case 23:  /* 32 bpp PRT */
2875                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2877                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2878                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2879                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2880                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2882                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2883                                 break;
2884                         case 24:  /* 64 bpp PRT */
2885                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2887                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2888                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2889                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2890                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2892                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2893                                 break;
2894                         case 25:  /* 128 bpp PRT */
2895                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2897                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2898                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2899                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2900                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2903                                 break;
2904                         default:
2905                                 gb_tile_moden = 0;
2906                                 break;
2907                         }
2908                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2909                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2910                 }
2911         } else
2912                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2913 }
2914
2915 static void si_select_se_sh(struct radeon_device *rdev,
2916                             u32 se_num, u32 sh_num)
2917 {
2918         u32 data = INSTANCE_BROADCAST_WRITES;
2919
2920         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2921                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2922         else if (se_num == 0xffffffff)
2923                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2924         else if (sh_num == 0xffffffff)
2925                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2926         else
2927                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2928         WREG32(GRBM_GFX_INDEX, data);
2929 }
2930
2931 static u32 si_create_bitmask(u32 bit_width)
2932 {
2933         u32 i, mask = 0;
2934
2935         for (i = 0; i < bit_width; i++) {
2936                 mask <<= 1;
2937                 mask |= 1;
2938         }
2939         return mask;
2940 }
2941
2942 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2943 {
2944         u32 data, mask;
2945
2946         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2947         if (data & 1)
2948                 data &= INACTIVE_CUS_MASK;
2949         else
2950                 data = 0;
2951         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2952
2953         data >>= INACTIVE_CUS_SHIFT;
2954
2955         mask = si_create_bitmask(cu_per_sh);
2956
2957         return ~data & mask;
2958 }
2959
2960 static void si_setup_spi(struct radeon_device *rdev,
2961                          u32 se_num, u32 sh_per_se,
2962                          u32 cu_per_sh)
2963 {
2964         int i, j, k;
2965         u32 data, mask, active_cu;
2966
2967         for (i = 0; i < se_num; i++) {
2968                 for (j = 0; j < sh_per_se; j++) {
2969                         si_select_se_sh(rdev, i, j);
2970                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2971                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2972
2973                         mask = 1;
2974                         for (k = 0; k < 16; k++) {
2975                                 mask <<= k;
2976                                 if (active_cu & mask) {
2977                                         data &= ~mask;
2978                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2979                                         break;
2980                                 }
2981                         }
2982                 }
2983         }
2984         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2985 }
2986
2987 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2988                               u32 max_rb_num_per_se,
2989                               u32 sh_per_se)
2990 {
2991         u32 data, mask;
2992
2993         data = RREG32(CC_RB_BACKEND_DISABLE);
2994         if (data & 1)
2995                 data &= BACKEND_DISABLE_MASK;
2996         else
2997                 data = 0;
2998         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2999
3000         data >>= BACKEND_DISABLE_SHIFT;
3001
3002         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3003
3004         return data & mask;
3005 }
3006
3007 static void si_setup_rb(struct radeon_device *rdev,
3008                         u32 se_num, u32 sh_per_se,
3009                         u32 max_rb_num_per_se)
3010 {
3011         int i, j;
3012         u32 data, mask;
3013         u32 disabled_rbs = 0;
3014         u32 enabled_rbs = 0;
3015
3016         for (i = 0; i < se_num; i++) {
3017                 for (j = 0; j < sh_per_se; j++) {
3018                         si_select_se_sh(rdev, i, j);
3019                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3020                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3021                 }
3022         }
3023         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3024
3025         mask = 1;
3026         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3027                 if (!(disabled_rbs & mask))
3028                         enabled_rbs |= mask;
3029                 mask <<= 1;
3030         }
3031
3032         rdev->config.si.backend_enable_mask = enabled_rbs;
3033
3034         for (i = 0; i < se_num; i++) {
3035                 si_select_se_sh(rdev, i, 0xffffffff);
3036                 data = 0;
3037                 for (j = 0; j < sh_per_se; j++) {
3038                         switch (enabled_rbs & 3) {
3039                         case 1:
3040                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3041                                 break;
3042                         case 2:
3043                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3044                                 break;
3045                         case 3:
3046                         default:
3047                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3048                                 break;
3049                         }
3050                         enabled_rbs >>= 2;
3051                 }
3052                 WREG32(PA_SC_RASTER_CONFIG, data);
3053         }
3054         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055 }
3056
3057 static void si_gpu_init(struct radeon_device *rdev)
3058 {
3059         u32 gb_addr_config = 0;
3060         u32 mc_shared_chmap, mc_arb_ramcfg;
3061         u32 sx_debug_1;
3062         u32 hdp_host_path_cntl;
3063         u32 tmp;
3064         int i, j;
3065
3066         switch (rdev->family) {
3067         case CHIP_TAHITI:
3068                 rdev->config.si.max_shader_engines = 2;
3069                 rdev->config.si.max_tile_pipes = 12;
3070                 rdev->config.si.max_cu_per_sh = 8;
3071                 rdev->config.si.max_sh_per_se = 2;
3072                 rdev->config.si.max_backends_per_se = 4;
3073                 rdev->config.si.max_texture_channel_caches = 12;
3074                 rdev->config.si.max_gprs = 256;
3075                 rdev->config.si.max_gs_threads = 32;
3076                 rdev->config.si.max_hw_contexts = 8;
3077
3078                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3079                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3080                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3081                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3082                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3083                 break;
3084         case CHIP_PITCAIRN:
3085                 rdev->config.si.max_shader_engines = 2;
3086                 rdev->config.si.max_tile_pipes = 8;
3087                 rdev->config.si.max_cu_per_sh = 5;
3088                 rdev->config.si.max_sh_per_se = 2;
3089                 rdev->config.si.max_backends_per_se = 4;
3090                 rdev->config.si.max_texture_channel_caches = 8;
3091                 rdev->config.si.max_gprs = 256;
3092                 rdev->config.si.max_gs_threads = 32;
3093                 rdev->config.si.max_hw_contexts = 8;
3094
3095                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3096                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3097                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3098                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3099                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3100                 break;
3101         case CHIP_VERDE:
3102         default:
3103                 rdev->config.si.max_shader_engines = 1;
3104                 rdev->config.si.max_tile_pipes = 4;
3105                 rdev->config.si.max_cu_per_sh = 5;
3106                 rdev->config.si.max_sh_per_se = 2;
3107                 rdev->config.si.max_backends_per_se = 4;
3108                 rdev->config.si.max_texture_channel_caches = 4;
3109                 rdev->config.si.max_gprs = 256;
3110                 rdev->config.si.max_gs_threads = 32;
3111                 rdev->config.si.max_hw_contexts = 8;
3112
3113                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3114                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3115                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3116                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3117                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3118                 break;
3119         case CHIP_OLAND:
3120                 rdev->config.si.max_shader_engines = 1;
3121                 rdev->config.si.max_tile_pipes = 4;
3122                 rdev->config.si.max_cu_per_sh = 6;
3123                 rdev->config.si.max_sh_per_se = 1;
3124                 rdev->config.si.max_backends_per_se = 2;
3125                 rdev->config.si.max_texture_channel_caches = 4;
3126                 rdev->config.si.max_gprs = 256;
3127                 rdev->config.si.max_gs_threads = 16;
3128                 rdev->config.si.max_hw_contexts = 8;
3129
3130                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3131                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3132                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3133                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3134                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3135                 break;
3136         case CHIP_HAINAN:
3137                 rdev->config.si.max_shader_engines = 1;
3138                 rdev->config.si.max_tile_pipes = 4;
3139                 rdev->config.si.max_cu_per_sh = 5;
3140                 rdev->config.si.max_sh_per_se = 1;
3141                 rdev->config.si.max_backends_per_se = 1;
3142                 rdev->config.si.max_texture_channel_caches = 2;
3143                 rdev->config.si.max_gprs = 256;
3144                 rdev->config.si.max_gs_threads = 16;
3145                 rdev->config.si.max_hw_contexts = 8;
3146
3147                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3148                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3149                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3150                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3151                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3152                 break;
3153         }
3154
3155         /* Initialize HDP */
3156         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3157                 WREG32((0x2c14 + j), 0x00000000);
3158                 WREG32((0x2c18 + j), 0x00000000);
3159                 WREG32((0x2c1c + j), 0x00000000);
3160                 WREG32((0x2c20 + j), 0x00000000);
3161                 WREG32((0x2c24 + j), 0x00000000);
3162         }
3163
3164         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3165
3166         evergreen_fix_pci_max_read_req_size(rdev);
3167
3168         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3169
3170         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3171         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3172
3173         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3174         rdev->config.si.mem_max_burst_length_bytes = 256;
3175         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3176         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3177         if (rdev->config.si.mem_row_size_in_kb > 4)
3178                 rdev->config.si.mem_row_size_in_kb = 4;
3179         /* XXX use MC settings? */
3180         rdev->config.si.shader_engine_tile_size = 32;
3181         rdev->config.si.num_gpus = 1;
3182         rdev->config.si.multi_gpu_tile_size = 64;
3183
3184         /* fix up row size */
3185         gb_addr_config &= ~ROW_SIZE_MASK;
3186         switch (rdev->config.si.mem_row_size_in_kb) {
3187         case 1:
3188         default:
3189                 gb_addr_config |= ROW_SIZE(0);
3190                 break;
3191         case 2:
3192                 gb_addr_config |= ROW_SIZE(1);
3193                 break;
3194         case 4:
3195                 gb_addr_config |= ROW_SIZE(2);
3196                 break;
3197         }
3198
3199         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3200          * not have bank info, so create a custom tiling dword.
3201          * bits 3:0   num_pipes
3202          * bits 7:4   num_banks
3203          * bits 11:8  group_size
3204          * bits 15:12 row_size
3205          */
3206         rdev->config.si.tile_config = 0;
3207         switch (rdev->config.si.num_tile_pipes) {
3208         case 1:
3209                 rdev->config.si.tile_config |= (0 << 0);
3210                 break;
3211         case 2:
3212                 rdev->config.si.tile_config |= (1 << 0);
3213                 break;
3214         case 4:
3215                 rdev->config.si.tile_config |= (2 << 0);
3216                 break;
3217         case 8:
3218         default:
3219                 /* XXX what about 12? */
3220                 rdev->config.si.tile_config |= (3 << 0);
3221                 break;
3222         }       
3223         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3224         case 0: /* four banks */
3225                 rdev->config.si.tile_config |= 0 << 4;
3226                 break;
3227         case 1: /* eight banks */
3228                 rdev->config.si.tile_config |= 1 << 4;
3229                 break;
3230         case 2: /* sixteen banks */
3231         default:
3232                 rdev->config.si.tile_config |= 2 << 4;
3233                 break;
3234         }
3235         rdev->config.si.tile_config |=
3236                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3237         rdev->config.si.tile_config |=
3238                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3239
3240         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3241         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3242         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3243         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3244         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3245         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3246         if (rdev->has_uvd) {
3247                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3248                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3249                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3250         }
3251
3252         si_tiling_mode_table_init(rdev);
3253
3254         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3255                     rdev->config.si.max_sh_per_se,
3256                     rdev->config.si.max_backends_per_se);
3257
3258         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3259                      rdev->config.si.max_sh_per_se,
3260                      rdev->config.si.max_cu_per_sh);
3261
3262         rdev->config.si.active_cus = 0;
3263         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3264                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3265                         rdev->config.si.active_cus +=
3266                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3267                 }
3268         }
3269
3270         /* set HW defaults for 3D engine */
3271         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3272                                      ROQ_IB2_START(0x2b)));
3273         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3274
3275         sx_debug_1 = RREG32(SX_DEBUG_1);
3276         WREG32(SX_DEBUG_1, sx_debug_1);
3277
3278         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3279
3280         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3281                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3282                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3283                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3284
3285         WREG32(VGT_NUM_INSTANCES, 1);
3286
3287         WREG32(CP_PERFMON_CNTL, 0);
3288
3289         WREG32(SQ_CONFIG, 0);
3290
3291         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3292                                           FORCE_EOV_MAX_REZ_CNT(255)));
3293
3294         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3295                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3296
3297         WREG32(VGT_GS_VERTEX_REUSE, 16);
3298         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3299
3300         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3301         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3302         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3303         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3304         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3305         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3306         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3307         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3308
3309         tmp = RREG32(HDP_MISC_CNTL);
3310         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3311         WREG32(HDP_MISC_CNTL, tmp);
3312
3313         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3314         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3315
3316         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3317
3318         udelay(50);
3319 }
3320
3321 /*
3322  * GPU scratch registers helpers function.
3323  */
3324 static void si_scratch_init(struct radeon_device *rdev)
3325 {
3326         int i;
3327
3328         rdev->scratch.num_reg = 7;
3329         rdev->scratch.reg_base = SCRATCH_REG0;
3330         for (i = 0; i < rdev->scratch.num_reg; i++) {
3331                 rdev->scratch.free[i] = true;
3332                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3333         }
3334 }
3335
3336 void si_fence_ring_emit(struct radeon_device *rdev,
3337                         struct radeon_fence *fence)
3338 {
3339         struct radeon_ring *ring = &rdev->ring[fence->ring];
3340         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3341
3342         /* flush read cache over gart */
3343         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3344         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3345         radeon_ring_write(ring, 0);
3346         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3347         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3348                           PACKET3_TC_ACTION_ENA |
3349                           PACKET3_SH_KCACHE_ACTION_ENA |
3350                           PACKET3_SH_ICACHE_ACTION_ENA);
3351         radeon_ring_write(ring, 0xFFFFFFFF);
3352         radeon_ring_write(ring, 0);
3353         radeon_ring_write(ring, 10); /* poll interval */
3354         /* EVENT_WRITE_EOP - flush caches, send int */
3355         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3356         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3357         radeon_ring_write(ring, lower_32_bits(addr));
3358         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3359         radeon_ring_write(ring, fence->seq);
3360         radeon_ring_write(ring, 0);
3361 }
3362
3363 /*
3364  * IB stuff
3365  */
3366 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3367 {
3368         struct radeon_ring *ring = &rdev->ring[ib->ring];
3369         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3370         u32 header;
3371
3372         if (ib->is_const_ib) {
3373                 /* set switch buffer packet before const IB */
3374                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3375                 radeon_ring_write(ring, 0);
3376
3377                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3378         } else {
3379                 u32 next_rptr;
3380                 if (ring->rptr_save_reg) {
3381                         next_rptr = ring->wptr + 3 + 4 + 8;
3382                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3383                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3384                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3385                         radeon_ring_write(ring, next_rptr);
3386                 } else if (rdev->wb.enabled) {
3387                         next_rptr = ring->wptr + 5 + 4 + 8;
3388                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3389                         radeon_ring_write(ring, (1 << 8));
3390                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3391                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3392                         radeon_ring_write(ring, next_rptr);
3393                 }
3394
3395                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3396         }
3397
3398         radeon_ring_write(ring, header);
3399         radeon_ring_write(ring,
3400 #ifdef __BIG_ENDIAN
3401                           (2 << 0) |
3402 #endif
3403                           (ib->gpu_addr & 0xFFFFFFFC));
3404         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3405         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3406
3407         if (!ib->is_const_ib) {
3408                 /* flush read cache over gart for this vmid */
3409                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3410                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3411                 radeon_ring_write(ring, vm_id);
3412                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3413                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3414                                   PACKET3_TC_ACTION_ENA |
3415                                   PACKET3_SH_KCACHE_ACTION_ENA |
3416                                   PACKET3_SH_ICACHE_ACTION_ENA);
3417                 radeon_ring_write(ring, 0xFFFFFFFF);
3418                 radeon_ring_write(ring, 0);
3419                 radeon_ring_write(ring, 10); /* poll interval */
3420         }
3421 }
3422
3423 /*
3424  * CP.
3425  */
3426 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3427 {
3428         if (enable)
3429                 WREG32(CP_ME_CNTL, 0);
3430         else {
3431                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3432                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3433                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3434                 WREG32(SCRATCH_UMSK, 0);
3435                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3436                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3437                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3438         }
3439         udelay(50);
3440 }
3441
3442 static int si_cp_load_microcode(struct radeon_device *rdev)
3443 {
3444         int i;
3445
3446         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3447                 return -EINVAL;
3448
3449         si_cp_enable(rdev, false);
3450
3451         if (rdev->new_fw) {
3452                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3453                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3454                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3455                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3456                 const struct gfx_firmware_header_v1_0 *me_hdr =
3457                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3458                 const __le32 *fw_data;
3459                 u32 fw_size;
3460
3461                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3462                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3463                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3464
3465                 /* PFP */
3466                 fw_data = (const __le32 *)
3467                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3468                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3469                 WREG32(CP_PFP_UCODE_ADDR, 0);
3470                 for (i = 0; i < fw_size; i++)
3471                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3472                 WREG32(CP_PFP_UCODE_ADDR, 0);
3473
3474                 /* CE */
3475                 fw_data = (const __le32 *)
3476                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3477                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3478                 WREG32(CP_CE_UCODE_ADDR, 0);
3479                 for (i = 0; i < fw_size; i++)
3480                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3481                 WREG32(CP_CE_UCODE_ADDR, 0);
3482
3483                 /* ME */
3484                 fw_data = (const __be32 *)
3485                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3486                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3487                 WREG32(CP_ME_RAM_WADDR, 0);
3488                 for (i = 0; i < fw_size; i++)
3489                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3490                 WREG32(CP_ME_RAM_WADDR, 0);
3491         } else {
3492                 const __be32 *fw_data;
3493
3494                 /* PFP */
3495                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3496                 WREG32(CP_PFP_UCODE_ADDR, 0);
3497                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3498                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3499                 WREG32(CP_PFP_UCODE_ADDR, 0);
3500
3501                 /* CE */
3502                 fw_data = (const __be32 *)rdev->ce_fw->data;
3503                 WREG32(CP_CE_UCODE_ADDR, 0);
3504                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3505                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3506                 WREG32(CP_CE_UCODE_ADDR, 0);
3507
3508                 /* ME */
3509                 fw_data = (const __be32 *)rdev->me_fw->data;
3510                 WREG32(CP_ME_RAM_WADDR, 0);
3511                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3512                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3513                 WREG32(CP_ME_RAM_WADDR, 0);
3514         }
3515
3516         WREG32(CP_PFP_UCODE_ADDR, 0);
3517         WREG32(CP_CE_UCODE_ADDR, 0);
3518         WREG32(CP_ME_RAM_WADDR, 0);
3519         WREG32(CP_ME_RAM_RADDR, 0);
3520         return 0;
3521 }
3522
3523 static int si_cp_start(struct radeon_device *rdev)
3524 {
3525         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3526         int r, i;
3527
3528         r = radeon_ring_lock(rdev, ring, 7 + 4);
3529         if (r) {
3530                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3531                 return r;
3532         }
3533         /* init the CP */
3534         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3535         radeon_ring_write(ring, 0x1);
3536         radeon_ring_write(ring, 0x0);
3537         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3538         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3539         radeon_ring_write(ring, 0);
3540         radeon_ring_write(ring, 0);
3541
3542         /* init the CE partitions */
3543         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3544         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3545         radeon_ring_write(ring, 0xc000);
3546         radeon_ring_write(ring, 0xe000);
3547         radeon_ring_unlock_commit(rdev, ring, false);
3548
3549         si_cp_enable(rdev, true);
3550
3551         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3552         if (r) {
3553                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3554                 return r;
3555         }
3556
3557         /* setup clear context state */
3558         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3559         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3560
3561         for (i = 0; i < si_default_size; i++)
3562                 radeon_ring_write(ring, si_default_state[i]);
3563
3564         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3565         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3566
3567         /* set clear context state */
3568         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3569         radeon_ring_write(ring, 0);
3570
3571         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3572         radeon_ring_write(ring, 0x00000316);
3573         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3574         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3575
3576         radeon_ring_unlock_commit(rdev, ring, false);
3577
3578         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3579                 ring = &rdev->ring[i];
3580                 r = radeon_ring_lock(rdev, ring, 2);
3581
3582                 /* clear the compute context state */
3583                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3584                 radeon_ring_write(ring, 0);
3585
3586                 radeon_ring_unlock_commit(rdev, ring, false);
3587         }
3588
3589         return 0;
3590 }
3591
3592 static void si_cp_fini(struct radeon_device *rdev)
3593 {
3594         struct radeon_ring *ring;
3595         si_cp_enable(rdev, false);
3596
3597         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3598         radeon_ring_fini(rdev, ring);
3599         radeon_scratch_free(rdev, ring->rptr_save_reg);
3600
3601         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3602         radeon_ring_fini(rdev, ring);
3603         radeon_scratch_free(rdev, ring->rptr_save_reg);
3604
3605         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3606         radeon_ring_fini(rdev, ring);
3607         radeon_scratch_free(rdev, ring->rptr_save_reg);
3608 }
3609
3610 static int si_cp_resume(struct radeon_device *rdev)
3611 {
3612         struct radeon_ring *ring;
3613         u32 tmp;
3614         u32 rb_bufsz;
3615         int r;
3616
3617         si_enable_gui_idle_interrupt(rdev, false);
3618
3619         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3620         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3621
3622         /* Set the write pointer delay */
3623         WREG32(CP_RB_WPTR_DELAY, 0);
3624
3625         WREG32(CP_DEBUG, 0);
3626         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3627
3628         /* ring 0 - compute and gfx */
3629         /* Set ring buffer size */
3630         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3631         rb_bufsz = order_base_2(ring->ring_size / 8);
3632         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3633 #ifdef __BIG_ENDIAN
3634         tmp |= BUF_SWAP_32BIT;
3635 #endif
3636         WREG32(CP_RB0_CNTL, tmp);
3637
3638         /* Initialize the ring buffer's read and write pointers */
3639         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3640         ring->wptr = 0;
3641         WREG32(CP_RB0_WPTR, ring->wptr);
3642
3643         /* set the wb address whether it's enabled or not */
3644         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3645         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3646
3647         if (rdev->wb.enabled)
3648                 WREG32(SCRATCH_UMSK, 0xff);
3649         else {
3650                 tmp |= RB_NO_UPDATE;
3651                 WREG32(SCRATCH_UMSK, 0);
3652         }
3653
3654         mdelay(1);
3655         WREG32(CP_RB0_CNTL, tmp);
3656
3657         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3658
3659         /* ring1  - compute only */
3660         /* Set ring buffer size */
3661         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3662         rb_bufsz = order_base_2(ring->ring_size / 8);
3663         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3664 #ifdef __BIG_ENDIAN
3665         tmp |= BUF_SWAP_32BIT;
3666 #endif
3667         WREG32(CP_RB1_CNTL, tmp);
3668
3669         /* Initialize the ring buffer's read and write pointers */
3670         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3671         ring->wptr = 0;
3672         WREG32(CP_RB1_WPTR, ring->wptr);
3673
3674         /* set the wb address whether it's enabled or not */
3675         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3676         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3677
3678         mdelay(1);
3679         WREG32(CP_RB1_CNTL, tmp);
3680
3681         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3682
3683         /* ring2 - compute only */
3684         /* Set ring buffer size */
3685         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3686         rb_bufsz = order_base_2(ring->ring_size / 8);
3687         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3688 #ifdef __BIG_ENDIAN
3689         tmp |= BUF_SWAP_32BIT;
3690 #endif
3691         WREG32(CP_RB2_CNTL, tmp);
3692
3693         /* Initialize the ring buffer's read and write pointers */
3694         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3695         ring->wptr = 0;
3696         WREG32(CP_RB2_WPTR, ring->wptr);
3697
3698         /* set the wb address whether it's enabled or not */
3699         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3700         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3701
3702         mdelay(1);
3703         WREG32(CP_RB2_CNTL, tmp);
3704
3705         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3706
3707         /* start the rings */
3708         si_cp_start(rdev);
3709         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3710         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3711         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3712         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3713         if (r) {
3714                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3715                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3716                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3717                 return r;
3718         }
3719         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3720         if (r) {
3721                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3722         }
3723         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3724         if (r) {
3725                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3726         }
3727
3728         si_enable_gui_idle_interrupt(rdev, true);
3729
3730         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3731                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3732
3733         return 0;
3734 }
3735
3736 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3737 {
3738         u32 reset_mask = 0;
3739         u32 tmp;
3740
3741         /* GRBM_STATUS */
3742         tmp = RREG32(GRBM_STATUS);
3743         if (tmp & (PA_BUSY | SC_BUSY |
3744                    BCI_BUSY | SX_BUSY |
3745                    TA_BUSY | VGT_BUSY |
3746                    DB_BUSY | CB_BUSY |
3747                    GDS_BUSY | SPI_BUSY |
3748                    IA_BUSY | IA_BUSY_NO_DMA))
3749                 reset_mask |= RADEON_RESET_GFX;
3750
3751         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3752                    CP_BUSY | CP_COHERENCY_BUSY))
3753                 reset_mask |= RADEON_RESET_CP;
3754
3755         if (tmp & GRBM_EE_BUSY)
3756                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3757
3758         /* GRBM_STATUS2 */
3759         tmp = RREG32(GRBM_STATUS2);
3760         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3761                 reset_mask |= RADEON_RESET_RLC;
3762
3763         /* DMA_STATUS_REG 0 */
3764         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3765         if (!(tmp & DMA_IDLE))
3766                 reset_mask |= RADEON_RESET_DMA;
3767
3768         /* DMA_STATUS_REG 1 */
3769         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3770         if (!(tmp & DMA_IDLE))
3771                 reset_mask |= RADEON_RESET_DMA1;
3772
3773         /* SRBM_STATUS2 */
3774         tmp = RREG32(SRBM_STATUS2);
3775         if (tmp & DMA_BUSY)
3776                 reset_mask |= RADEON_RESET_DMA;
3777
3778         if (tmp & DMA1_BUSY)
3779                 reset_mask |= RADEON_RESET_DMA1;
3780
3781         /* SRBM_STATUS */
3782         tmp = RREG32(SRBM_STATUS);
3783
3784         if (tmp & IH_BUSY)
3785                 reset_mask |= RADEON_RESET_IH;
3786
3787         if (tmp & SEM_BUSY)
3788                 reset_mask |= RADEON_RESET_SEM;
3789
3790         if (tmp & GRBM_RQ_PENDING)
3791                 reset_mask |= RADEON_RESET_GRBM;
3792
3793         if (tmp & VMC_BUSY)
3794                 reset_mask |= RADEON_RESET_VMC;
3795
3796         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3797                    MCC_BUSY | MCD_BUSY))
3798                 reset_mask |= RADEON_RESET_MC;
3799
3800         if (evergreen_is_display_hung(rdev))
3801                 reset_mask |= RADEON_RESET_DISPLAY;
3802
3803         /* VM_L2_STATUS */
3804         tmp = RREG32(VM_L2_STATUS);
3805         if (tmp & L2_BUSY)
3806                 reset_mask |= RADEON_RESET_VMC;
3807
3808         /* Skip MC reset as it's mostly likely not hung, just busy */
3809         if (reset_mask & RADEON_RESET_MC) {
3810                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3811                 reset_mask &= ~RADEON_RESET_MC;
3812         }
3813
3814         return reset_mask;
3815 }
3816
3817 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3818 {
3819         struct evergreen_mc_save save;
3820         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3821         u32 tmp;
3822
3823         if (reset_mask == 0)
3824                 return;
3825
3826         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3827
3828         evergreen_print_gpu_status_regs(rdev);
3829         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3830                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3831         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3832                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3833
3834         /* disable PG/CG */
3835         si_fini_pg(rdev);
3836         si_fini_cg(rdev);
3837
3838         /* stop the rlc */
3839         si_rlc_stop(rdev);
3840
3841         /* Disable CP parsing/prefetching */
3842         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3843
3844         if (reset_mask & RADEON_RESET_DMA) {
3845                 /* dma0 */
3846                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3847                 tmp &= ~DMA_RB_ENABLE;
3848                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3849         }
3850         if (reset_mask & RADEON_RESET_DMA1) {
3851                 /* dma1 */
3852                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3853                 tmp &= ~DMA_RB_ENABLE;
3854                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3855         }
3856
3857         udelay(50);
3858
3859         evergreen_mc_stop(rdev, &save);
3860         if (evergreen_mc_wait_for_idle(rdev)) {
3861                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3862         }
3863
3864         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3865                 grbm_soft_reset = SOFT_RESET_CB |
3866                         SOFT_RESET_DB |
3867                         SOFT_RESET_GDS |
3868                         SOFT_RESET_PA |
3869                         SOFT_RESET_SC |
3870                         SOFT_RESET_BCI |
3871                         SOFT_RESET_SPI |
3872                         SOFT_RESET_SX |
3873                         SOFT_RESET_TC |
3874                         SOFT_RESET_TA |
3875                         SOFT_RESET_VGT |
3876                         SOFT_RESET_IA;
3877         }
3878
3879         if (reset_mask & RADEON_RESET_CP) {
3880                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3881
3882                 srbm_soft_reset |= SOFT_RESET_GRBM;
3883         }
3884
3885         if (reset_mask & RADEON_RESET_DMA)
3886                 srbm_soft_reset |= SOFT_RESET_DMA;
3887
3888         if (reset_mask & RADEON_RESET_DMA1)
3889                 srbm_soft_reset |= SOFT_RESET_DMA1;
3890
3891         if (reset_mask & RADEON_RESET_DISPLAY)
3892                 srbm_soft_reset |= SOFT_RESET_DC;
3893
3894         if (reset_mask & RADEON_RESET_RLC)
3895                 grbm_soft_reset |= SOFT_RESET_RLC;
3896
3897         if (reset_mask & RADEON_RESET_SEM)
3898                 srbm_soft_reset |= SOFT_RESET_SEM;
3899
3900         if (reset_mask & RADEON_RESET_IH)
3901                 srbm_soft_reset |= SOFT_RESET_IH;
3902
3903         if (reset_mask & RADEON_RESET_GRBM)
3904                 srbm_soft_reset |= SOFT_RESET_GRBM;
3905
3906         if (reset_mask & RADEON_RESET_VMC)
3907                 srbm_soft_reset |= SOFT_RESET_VMC;
3908
3909         if (reset_mask & RADEON_RESET_MC)
3910                 srbm_soft_reset |= SOFT_RESET_MC;
3911
3912         if (grbm_soft_reset) {
3913                 tmp = RREG32(GRBM_SOFT_RESET);
3914                 tmp |= grbm_soft_reset;
3915                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3916                 WREG32(GRBM_SOFT_RESET, tmp);
3917                 tmp = RREG32(GRBM_SOFT_RESET);
3918
3919                 udelay(50);
3920
3921                 tmp &= ~grbm_soft_reset;
3922                 WREG32(GRBM_SOFT_RESET, tmp);
3923                 tmp = RREG32(GRBM_SOFT_RESET);
3924         }
3925
3926         if (srbm_soft_reset) {
3927                 tmp = RREG32(SRBM_SOFT_RESET);
3928                 tmp |= srbm_soft_reset;
3929                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3930                 WREG32(SRBM_SOFT_RESET, tmp);
3931                 tmp = RREG32(SRBM_SOFT_RESET);
3932
3933                 udelay(50);
3934
3935                 tmp &= ~srbm_soft_reset;
3936                 WREG32(SRBM_SOFT_RESET, tmp);
3937                 tmp = RREG32(SRBM_SOFT_RESET);
3938         }
3939
3940         /* Wait a little for things to settle down */
3941         udelay(50);
3942
3943         evergreen_mc_resume(rdev, &save);
3944         udelay(50);
3945
3946         evergreen_print_gpu_status_regs(rdev);
3947 }
3948
3949 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3950 {
3951         u32 tmp, i;
3952
3953         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3954         tmp |= SPLL_BYPASS_EN;
3955         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3956
3957         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3958         tmp |= SPLL_CTLREQ_CHG;
3959         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3960
3961         for (i = 0; i < rdev->usec_timeout; i++) {
3962                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3963                         break;
3964                 udelay(1);
3965         }
3966
3967         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3968         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3969         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3970
3971         tmp = RREG32(MPLL_CNTL_MODE);
3972         tmp &= ~MPLL_MCLK_SEL;
3973         WREG32(MPLL_CNTL_MODE, tmp);
3974 }
3975
3976 static void si_spll_powerdown(struct radeon_device *rdev)
3977 {
3978         u32 tmp;
3979
3980         tmp = RREG32(SPLL_CNTL_MODE);
3981         tmp |= SPLL_SW_DIR_CONTROL;
3982         WREG32(SPLL_CNTL_MODE, tmp);
3983
3984         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3985         tmp |= SPLL_RESET;
3986         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3987
3988         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3989         tmp |= SPLL_SLEEP;
3990         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3991
3992         tmp = RREG32(SPLL_CNTL_MODE);
3993         tmp &= ~SPLL_SW_DIR_CONTROL;
3994         WREG32(SPLL_CNTL_MODE, tmp);
3995 }
3996
3997 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3998 {
3999         struct evergreen_mc_save save;
4000         u32 tmp, i;
4001
4002         dev_info(rdev->dev, "GPU pci config reset\n");
4003
4004         /* disable dpm? */
4005
4006         /* disable cg/pg */
4007         si_fini_pg(rdev);
4008         si_fini_cg(rdev);
4009
4010         /* Disable CP parsing/prefetching */
4011         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4012         /* dma0 */
4013         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4014         tmp &= ~DMA_RB_ENABLE;
4015         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4016         /* dma1 */
4017         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4018         tmp &= ~DMA_RB_ENABLE;
4019         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4020         /* XXX other engines? */
4021
4022         /* halt the rlc, disable cp internal ints */
4023         si_rlc_stop(rdev);
4024
4025         udelay(50);
4026
4027         /* disable mem access */
4028         evergreen_mc_stop(rdev, &save);
4029         if (evergreen_mc_wait_for_idle(rdev)) {
4030                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4031         }
4032
4033         /* set mclk/sclk to bypass */
4034         si_set_clk_bypass_mode(rdev);
4035         /* powerdown spll */
4036         si_spll_powerdown(rdev);
4037         /* disable BM */
4038         pci_clear_master(rdev->pdev);
4039         /* reset */
4040         radeon_pci_config_reset(rdev);
4041         /* wait for asic to come out of reset */
4042         for (i = 0; i < rdev->usec_timeout; i++) {
4043                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4044                         break;
4045                 udelay(1);
4046         }
4047 }
4048
4049 int si_asic_reset(struct radeon_device *rdev)
4050 {
4051         u32 reset_mask;
4052
4053         reset_mask = si_gpu_check_soft_reset(rdev);
4054
4055         if (reset_mask)
4056                 r600_set_bios_scratch_engine_hung(rdev, true);
4057
4058         /* try soft reset */
4059         si_gpu_soft_reset(rdev, reset_mask);
4060
4061         reset_mask = si_gpu_check_soft_reset(rdev);
4062
4063         /* try pci config reset */
4064         if (reset_mask && radeon_hard_reset)
4065                 si_gpu_pci_config_reset(rdev);
4066
4067         reset_mask = si_gpu_check_soft_reset(rdev);
4068
4069         if (!reset_mask)
4070                 r600_set_bios_scratch_engine_hung(rdev, false);
4071
4072         return 0;
4073 }
4074
4075 /**
4076  * si_gfx_is_lockup - Check if the GFX engine is locked up
4077  *
4078  * @rdev: radeon_device pointer
4079  * @ring: radeon_ring structure holding ring information
4080  *
4081  * Check if the GFX engine is locked up.
4082  * Returns true if the engine appears to be locked up, false if not.
4083  */
4084 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4085 {
4086         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4087
4088         if (!(reset_mask & (RADEON_RESET_GFX |
4089                             RADEON_RESET_COMPUTE |
4090                             RADEON_RESET_CP))) {
4091                 radeon_ring_lockup_update(rdev, ring);
4092                 return false;
4093         }
4094         return radeon_ring_test_lockup(rdev, ring);
4095 }
4096
4097 /* MC */
4098 static void si_mc_program(struct radeon_device *rdev)
4099 {
4100         struct evergreen_mc_save save;
4101         u32 tmp;
4102         int i, j;
4103
4104         /* Initialize HDP */
4105         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4106                 WREG32((0x2c14 + j), 0x00000000);
4107                 WREG32((0x2c18 + j), 0x00000000);
4108                 WREG32((0x2c1c + j), 0x00000000);
4109                 WREG32((0x2c20 + j), 0x00000000);
4110                 WREG32((0x2c24 + j), 0x00000000);
4111         }
4112         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4113
4114         evergreen_mc_stop(rdev, &save);
4115         if (radeon_mc_wait_for_idle(rdev)) {
4116                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4117         }
4118         if (!ASIC_IS_NODCE(rdev))
4119                 /* Lockout access through VGA aperture*/
4120                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4121         /* Update configuration */
4122         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4123                rdev->mc.vram_start >> 12);
4124         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4125                rdev->mc.vram_end >> 12);
4126         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4127                rdev->vram_scratch.gpu_addr >> 12);
4128         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4129         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4130         WREG32(MC_VM_FB_LOCATION, tmp);
4131         /* XXX double check these! */
4132         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4133         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4134         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4135         WREG32(MC_VM_AGP_BASE, 0);
4136         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4137         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4138         if (radeon_mc_wait_for_idle(rdev)) {
4139                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4140         }
4141         evergreen_mc_resume(rdev, &save);
4142         if (!ASIC_IS_NODCE(rdev)) {
4143                 /* we need to own VRAM, so turn off the VGA renderer here
4144                  * to stop it overwriting our objects */
4145                 rv515_vga_render_disable(rdev);
4146         }
4147 }
4148
4149 void si_vram_gtt_location(struct radeon_device *rdev,
4150                           struct radeon_mc *mc)
4151 {
4152         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4153                 /* leave room for at least 1024M GTT */
4154                 dev_warn(rdev->dev, "limiting VRAM\n");
4155                 mc->real_vram_size = 0xFFC0000000ULL;
4156                 mc->mc_vram_size = 0xFFC0000000ULL;
4157         }
4158         radeon_vram_location(rdev, &rdev->mc, 0);
4159         rdev->mc.gtt_base_align = 0;
4160         radeon_gtt_location(rdev, mc);
4161 }
4162
4163 static int si_mc_init(struct radeon_device *rdev)
4164 {
4165         u32 tmp;
4166         int chansize, numchan;
4167
4168         /* Get VRAM informations */
4169         rdev->mc.vram_is_ddr = true;
4170         tmp = RREG32(MC_ARB_RAMCFG);
4171         if (tmp & CHANSIZE_OVERRIDE) {
4172                 chansize = 16;
4173         } else if (tmp & CHANSIZE_MASK) {
4174                 chansize = 64;
4175         } else {
4176                 chansize = 32;
4177         }
4178         tmp = RREG32(MC_SHARED_CHMAP);
4179         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4180         case 0:
4181         default:
4182                 numchan = 1;
4183                 break;
4184         case 1:
4185                 numchan = 2;
4186                 break;
4187         case 2:
4188                 numchan = 4;
4189                 break;
4190         case 3:
4191                 numchan = 8;
4192                 break;
4193         case 4:
4194                 numchan = 3;
4195                 break;
4196         case 5:
4197                 numchan = 6;
4198                 break;
4199         case 6:
4200                 numchan = 10;
4201                 break;
4202         case 7:
4203                 numchan = 12;
4204                 break;
4205         case 8:
4206                 numchan = 16;
4207                 break;
4208         }
4209         rdev->mc.vram_width = numchan * chansize;
4210         /* Could aper size report 0 ? */
4211         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4212         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4213         /* size in MB on si */
4214         tmp = RREG32(CONFIG_MEMSIZE);
4215         /* some boards may have garbage in the upper 16 bits */
4216         if (tmp & 0xffff0000) {
4217                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4218                 if (tmp & 0xffff)
4219                         tmp &= 0xffff;
4220         }
4221         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4222         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4223         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4224         si_vram_gtt_location(rdev, &rdev->mc);
4225         radeon_update_bandwidth_info(rdev);
4226
4227         return 0;
4228 }
4229
4230 /*
4231  * GART
4232  */
4233 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4234 {
4235         /* flush hdp cache */
4236         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4237
4238         /* bits 0-15 are the VM contexts0-15 */
4239         WREG32(VM_INVALIDATE_REQUEST, 1);
4240 }
4241
4242 static int si_pcie_gart_enable(struct radeon_device *rdev)
4243 {
4244         int r, i;
4245
4246         if (rdev->gart.robj == NULL) {
4247                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4248                 return -EINVAL;
4249         }
4250         r = radeon_gart_table_vram_pin(rdev);
4251         if (r)
4252                 return r;
4253         /* Setup TLB control */
4254         WREG32(MC_VM_MX_L1_TLB_CNTL,
4255                (0xA << 7) |
4256                ENABLE_L1_TLB |
4257                ENABLE_L1_FRAGMENT_PROCESSING |
4258                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4259                ENABLE_ADVANCED_DRIVER_MODEL |
4260                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4261         /* Setup L2 cache */
4262         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4263                ENABLE_L2_FRAGMENT_PROCESSING |
4264                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4265                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4266                EFFECTIVE_L2_QUEUE_SIZE(7) |
4267                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4268         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4269         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4270                BANK_SELECT(4) |
4271                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4272         /* setup context0 */
4273         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4274         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4275         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4276         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4277                         (u32)(rdev->dummy_page.addr >> 12));
4278         WREG32(VM_CONTEXT0_CNTL2, 0);
4279         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4280                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4281
4282         WREG32(0x15D4, 0);
4283         WREG32(0x15D8, 0);
4284         WREG32(0x15DC, 0);
4285
4286         /* empty context1-15 */
4287         /* set vm size, must be a multiple of 4 */
4288         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4289         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4290         /* Assign the pt base to something valid for now; the pts used for
4291          * the VMs are determined by the application and setup and assigned
4292          * on the fly in the vm part of radeon_gart.c
4293          */
4294         for (i = 1; i < 16; i++) {
4295                 if (i < 8)
4296                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4297                                rdev->vm_manager.saved_table_addr[i]);
4298                 else
4299                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4300                                rdev->vm_manager.saved_table_addr[i]);
4301         }
4302
4303         /* enable context1-15 */
4304         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4305                (u32)(rdev->dummy_page.addr >> 12));
4306         WREG32(VM_CONTEXT1_CNTL2, 4);
4307         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4308                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4309                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4310                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4311                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4312                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4313                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4314                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4315                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4316                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4317                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4318                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4319                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4320                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4321
4322         si_pcie_gart_tlb_flush(rdev);
4323         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4324                  (unsigned)(rdev->mc.gtt_size >> 20),
4325                  (unsigned long long)rdev->gart.table_addr);
4326         rdev->gart.ready = true;
4327         return 0;
4328 }
4329
4330 static void si_pcie_gart_disable(struct radeon_device *rdev)
4331 {
4332         unsigned i;
4333
4334         for (i = 1; i < 16; ++i) {
4335                 uint32_t reg;
4336                 if (i < 8)
4337                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4338                 else
4339                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4340                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4341         }
4342
4343         /* Disable all tables */
4344         WREG32(VM_CONTEXT0_CNTL, 0);
4345         WREG32(VM_CONTEXT1_CNTL, 0);
4346         /* Setup TLB control */
4347         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4348                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4349         /* Setup L2 cache */
4350         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4351                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4352                EFFECTIVE_L2_QUEUE_SIZE(7) |
4353                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4354         WREG32(VM_L2_CNTL2, 0);
4355         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4356                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4357         radeon_gart_table_vram_unpin(rdev);
4358 }
4359
4360 static void si_pcie_gart_fini(struct radeon_device *rdev)
4361 {
4362         si_pcie_gart_disable(rdev);
4363         radeon_gart_table_vram_free(rdev);
4364         radeon_gart_fini(rdev);
4365 }
4366
4367 /* vm parser */
4368 static bool si_vm_reg_valid(u32 reg)
4369 {
4370         /* context regs are fine */
4371         if (reg >= 0x28000)
4372                 return true;
4373
4374         /* check config regs */
4375         switch (reg) {
4376         case GRBM_GFX_INDEX:
4377         case CP_STRMOUT_CNTL:
4378         case VGT_VTX_VECT_EJECT_REG:
4379         case VGT_CACHE_INVALIDATION:
4380         case VGT_ESGS_RING_SIZE:
4381         case VGT_GSVS_RING_SIZE:
4382         case VGT_GS_VERTEX_REUSE:
4383         case VGT_PRIMITIVE_TYPE:
4384         case VGT_INDEX_TYPE:
4385         case VGT_NUM_INDICES:
4386         case VGT_NUM_INSTANCES:
4387         case VGT_TF_RING_SIZE:
4388         case VGT_HS_OFFCHIP_PARAM:
4389         case VGT_TF_MEMORY_BASE:
4390         case PA_CL_ENHANCE:
4391         case PA_SU_LINE_STIPPLE_VALUE:
4392         case PA_SC_LINE_STIPPLE_STATE:
4393         case PA_SC_ENHANCE:
4394         case SQC_CACHES:
4395         case SPI_STATIC_THREAD_MGMT_1:
4396         case SPI_STATIC_THREAD_MGMT_2:
4397         case SPI_STATIC_THREAD_MGMT_3:
4398         case SPI_PS_MAX_WAVE_ID:
4399         case SPI_CONFIG_CNTL:
4400         case SPI_CONFIG_CNTL_1:
4401         case TA_CNTL_AUX:
4402                 return true;
4403         default:
4404                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4405                 return false;
4406         }
4407 }
4408
4409 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4410                                   u32 *ib, struct radeon_cs_packet *pkt)
4411 {
4412         switch (pkt->opcode) {
4413         case PACKET3_NOP:
4414         case PACKET3_SET_BASE:
4415         case PACKET3_SET_CE_DE_COUNTERS:
4416         case PACKET3_LOAD_CONST_RAM:
4417         case PACKET3_WRITE_CONST_RAM:
4418         case PACKET3_WRITE_CONST_RAM_OFFSET:
4419         case PACKET3_DUMP_CONST_RAM:
4420         case PACKET3_INCREMENT_CE_COUNTER:
4421         case PACKET3_WAIT_ON_DE_COUNTER:
4422         case PACKET3_CE_WRITE:
4423                 break;
4424         default:
4425                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4426                 return -EINVAL;
4427         }
4428         return 0;
4429 }
4430
4431 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4432 {
4433         u32 start_reg, reg, i;
4434         u32 command = ib[idx + 4];
4435         u32 info = ib[idx + 1];
4436         u32 idx_value = ib[idx];
4437         if (command & PACKET3_CP_DMA_CMD_SAS) {
4438                 /* src address space is register */
4439                 if (((info & 0x60000000) >> 29) == 0) {
4440                         start_reg = idx_value << 2;
4441                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4442                                 reg = start_reg;
4443                                 if (!si_vm_reg_valid(reg)) {
4444                                         DRM_ERROR("CP DMA Bad SRC register\n");
4445                                         return -EINVAL;
4446                                 }
4447                         } else {
4448                                 for (i = 0; i < (command & 0x1fffff); i++) {
4449                                         reg = start_reg + (4 * i);
4450                                         if (!si_vm_reg_valid(reg)) {
4451                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4452                                                 return -EINVAL;
4453                                         }
4454                                 }
4455                         }
4456                 }
4457         }
4458         if (command & PACKET3_CP_DMA_CMD_DAS) {
4459                 /* dst address space is register */
4460                 if (((info & 0x00300000) >> 20) == 0) {
4461                         start_reg = ib[idx + 2];
4462                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4463                                 reg = start_reg;
4464                                 if (!si_vm_reg_valid(reg)) {
4465                                         DRM_ERROR("CP DMA Bad DST register\n");
4466                                         return -EINVAL;
4467                                 }
4468                         } else {
4469                                 for (i = 0; i < (command & 0x1fffff); i++) {
4470                                         reg = start_reg + (4 * i);
4471                                 if (!si_vm_reg_valid(reg)) {
4472                                                 DRM_ERROR("CP DMA Bad DST register\n");
4473                                                 return -EINVAL;
4474                                         }
4475                                 }
4476                         }
4477                 }
4478         }
4479         return 0;
4480 }
4481
4482 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4483                                    u32 *ib, struct radeon_cs_packet *pkt)
4484 {
4485         int r;
4486         u32 idx = pkt->idx + 1;
4487         u32 idx_value = ib[idx];
4488         u32 start_reg, end_reg, reg, i;
4489
4490         switch (pkt->opcode) {
4491         case PACKET3_NOP:
4492         case PACKET3_SET_BASE:
4493         case PACKET3_CLEAR_STATE:
4494         case PACKET3_INDEX_BUFFER_SIZE:
4495         case PACKET3_DISPATCH_DIRECT:
4496         case PACKET3_DISPATCH_INDIRECT:
4497         case PACKET3_ALLOC_GDS:
4498         case PACKET3_WRITE_GDS_RAM:
4499         case PACKET3_ATOMIC_GDS:
4500         case PACKET3_ATOMIC:
4501         case PACKET3_OCCLUSION_QUERY:
4502         case PACKET3_SET_PREDICATION:
4503         case PACKET3_COND_EXEC:
4504         case PACKET3_PRED_EXEC:
4505         case PACKET3_DRAW_INDIRECT:
4506         case PACKET3_DRAW_INDEX_INDIRECT:
4507         case PACKET3_INDEX_BASE:
4508         case PACKET3_DRAW_INDEX_2:
4509         case PACKET3_CONTEXT_CONTROL:
4510         case PACKET3_INDEX_TYPE:
4511         case PACKET3_DRAW_INDIRECT_MULTI:
4512         case PACKET3_DRAW_INDEX_AUTO:
4513         case PACKET3_DRAW_INDEX_IMMD:
4514         case PACKET3_NUM_INSTANCES:
4515         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4516         case PACKET3_STRMOUT_BUFFER_UPDATE:
4517         case PACKET3_DRAW_INDEX_OFFSET_2:
4518         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4519         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4520         case PACKET3_MPEG_INDEX:
4521         case PACKET3_WAIT_REG_MEM:
4522         case PACKET3_MEM_WRITE:
4523         case PACKET3_PFP_SYNC_ME:
4524         case PACKET3_SURFACE_SYNC:
4525         case PACKET3_EVENT_WRITE:
4526         case PACKET3_EVENT_WRITE_EOP:
4527         case PACKET3_EVENT_WRITE_EOS:
4528         case PACKET3_SET_CONTEXT_REG:
4529         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4530         case PACKET3_SET_SH_REG:
4531         case PACKET3_SET_SH_REG_OFFSET:
4532         case PACKET3_INCREMENT_DE_COUNTER:
4533         case PACKET3_WAIT_ON_CE_COUNTER:
4534         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4535         case PACKET3_ME_WRITE:
4536                 break;
4537         case PACKET3_COPY_DATA:
4538                 if ((idx_value & 0xf00) == 0) {
4539                         reg = ib[idx + 3] * 4;
4540                         if (!si_vm_reg_valid(reg))
4541                                 return -EINVAL;
4542                 }
4543                 break;
4544         case PACKET3_WRITE_DATA:
4545                 if ((idx_value & 0xf00) == 0) {
4546                         start_reg = ib[idx + 1] * 4;
4547                         if (idx_value & 0x10000) {
4548                                 if (!si_vm_reg_valid(start_reg))
4549                                         return -EINVAL;
4550                         } else {
4551                                 for (i = 0; i < (pkt->count - 2); i++) {
4552                                         reg = start_reg + (4 * i);
4553                                         if (!si_vm_reg_valid(reg))
4554                                                 return -EINVAL;
4555                                 }
4556                         }
4557                 }
4558                 break;
4559         case PACKET3_COND_WRITE:
4560                 if (idx_value & 0x100) {
4561                         reg = ib[idx + 5] * 4;
4562                         if (!si_vm_reg_valid(reg))
4563                                 return -EINVAL;
4564                 }
4565                 break;
4566         case PACKET3_COPY_DW:
4567                 if (idx_value & 0x2) {
4568                         reg = ib[idx + 3] * 4;
4569                         if (!si_vm_reg_valid(reg))
4570                                 return -EINVAL;
4571                 }
4572                 break;
4573         case PACKET3_SET_CONFIG_REG:
4574                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4575                 end_reg = 4 * pkt->count + start_reg - 4;
4576                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4577                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4578                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4579                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4580                         return -EINVAL;
4581                 }
4582                 for (i = 0; i < pkt->count; i++) {
4583                         reg = start_reg + (4 * i);
4584                         if (!si_vm_reg_valid(reg))
4585                                 return -EINVAL;
4586                 }
4587                 break;
4588         case PACKET3_CP_DMA:
4589                 r = si_vm_packet3_cp_dma_check(ib, idx);
4590                 if (r)
4591                         return r;
4592                 break;
4593         default:
4594                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4595                 return -EINVAL;
4596         }
4597         return 0;
4598 }
4599
4600 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4601                                        u32 *ib, struct radeon_cs_packet *pkt)
4602 {
4603         int r;
4604         u32 idx = pkt->idx + 1;
4605         u32 idx_value = ib[idx];
4606         u32 start_reg, reg, i;
4607
4608         switch (pkt->opcode) {
4609         case PACKET3_NOP:
4610         case PACKET3_SET_BASE:
4611         case PACKET3_CLEAR_STATE:
4612         case PACKET3_DISPATCH_DIRECT:
4613         case PACKET3_DISPATCH_INDIRECT:
4614         case PACKET3_ALLOC_GDS:
4615         case PACKET3_WRITE_GDS_RAM:
4616         case PACKET3_ATOMIC_GDS:
4617         case PACKET3_ATOMIC:
4618         case PACKET3_OCCLUSION_QUERY:
4619         case PACKET3_SET_PREDICATION:
4620         case PACKET3_COND_EXEC:
4621         case PACKET3_PRED_EXEC:
4622         case PACKET3_CONTEXT_CONTROL:
4623         case PACKET3_STRMOUT_BUFFER_UPDATE:
4624         case PACKET3_WAIT_REG_MEM:
4625         case PACKET3_MEM_WRITE:
4626         case PACKET3_PFP_SYNC_ME:
4627         case PACKET3_SURFACE_SYNC:
4628         case PACKET3_EVENT_WRITE:
4629         case PACKET3_EVENT_WRITE_EOP:
4630         case PACKET3_EVENT_WRITE_EOS:
4631         case PACKET3_SET_CONTEXT_REG:
4632         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4633         case PACKET3_SET_SH_REG:
4634         case PACKET3_SET_SH_REG_OFFSET:
4635         case PACKET3_INCREMENT_DE_COUNTER:
4636         case PACKET3_WAIT_ON_CE_COUNTER:
4637         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4638         case PACKET3_ME_WRITE:
4639                 break;
4640         case PACKET3_COPY_DATA:
4641                 if ((idx_value & 0xf00) == 0) {
4642                         reg = ib[idx + 3] * 4;
4643                         if (!si_vm_reg_valid(reg))
4644                                 return -EINVAL;
4645                 }
4646                 break;
4647         case PACKET3_WRITE_DATA:
4648                 if ((idx_value & 0xf00) == 0) {
4649                         start_reg = ib[idx + 1] * 4;
4650                         if (idx_value & 0x10000) {
4651                                 if (!si_vm_reg_valid(start_reg))
4652                                         return -EINVAL;
4653                         } else {
4654                                 for (i = 0; i < (pkt->count - 2); i++) {
4655                                         reg = start_reg + (4 * i);
4656                                         if (!si_vm_reg_valid(reg))
4657                                                 return -EINVAL;
4658                                 }
4659                         }
4660                 }
4661                 break;
4662         case PACKET3_COND_WRITE:
4663                 if (idx_value & 0x100) {
4664                         reg = ib[idx + 5] * 4;
4665                         if (!si_vm_reg_valid(reg))
4666                                 return -EINVAL;
4667                 }
4668                 break;
4669         case PACKET3_COPY_DW:
4670                 if (idx_value & 0x2) {
4671                         reg = ib[idx + 3] * 4;
4672                         if (!si_vm_reg_valid(reg))
4673                                 return -EINVAL;
4674                 }
4675                 break;
4676         case PACKET3_CP_DMA:
4677                 r = si_vm_packet3_cp_dma_check(ib, idx);
4678                 if (r)
4679                         return r;
4680                 break;
4681         default:
4682                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4683                 return -EINVAL;
4684         }
4685         return 0;
4686 }
4687
4688 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4689 {
4690         int ret = 0;
4691         u32 idx = 0, i;
4692         struct radeon_cs_packet pkt;
4693
4694         do {
4695                 pkt.idx = idx;
4696                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4697                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4698                 pkt.one_reg_wr = 0;
4699                 switch (pkt.type) {
4700                 case RADEON_PACKET_TYPE0:
4701                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4702                         for (i = 0; i < ib->length_dw; i++) {
4703                                 if (i == idx)
4704                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4705                                 else
4706                                         printk("\t0x%08x\n", ib->ptr[i]);
4707                         }
4708                         ret = -EINVAL;
4709                         break;
4710                 case RADEON_PACKET_TYPE2:
4711                         idx += 1;
4712                         break;
4713                 case RADEON_PACKET_TYPE3:
4714                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4715                         if (ib->is_const_ib)
4716                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4717                         else {
4718                                 switch (ib->ring) {
4719                                 case RADEON_RING_TYPE_GFX_INDEX:
4720                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4721                                         break;
4722                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4723                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4724                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4725                                         break;
4726                                 default:
4727                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4728                                         ret = -EINVAL;
4729                                         break;
4730                                 }
4731                         }
4732                         idx += pkt.count + 2;
4733                         break;
4734                 default:
4735                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4736                         ret = -EINVAL;
4737                         break;
4738                 }
4739                 if (ret)
4740                         break;
4741         } while (idx < ib->length_dw);
4742
4743         return ret;
4744 }
4745
4746 /*
4747  * vm
4748  */
4749 int si_vm_init(struct radeon_device *rdev)
4750 {
4751         /* number of VMs */
4752         rdev->vm_manager.nvm = 16;
4753         /* base offset of vram pages */
4754         rdev->vm_manager.vram_base_offset = 0;
4755
4756         return 0;
4757 }
4758
4759 void si_vm_fini(struct radeon_device *rdev)
4760 {
4761 }
4762
4763 /**
4764  * si_vm_decode_fault - print human readable fault info
4765  *
4766  * @rdev: radeon_device pointer
4767  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4768  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4769  *
4770  * Print human readable fault information (SI).
4771  */
4772 static void si_vm_decode_fault(struct radeon_device *rdev,
4773                                u32 status, u32 addr)
4774 {
4775         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4776         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4777         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4778         char *block;
4779
4780         if (rdev->family == CHIP_TAHITI) {
4781                 switch (mc_id) {
4782                 case 160:
4783                 case 144:
4784                 case 96:
4785                 case 80:
4786                 case 224:
4787                 case 208:
4788                 case 32:
4789                 case 16:
4790                         block = "CB";
4791                         break;
4792                 case 161:
4793                 case 145:
4794                 case 97:
4795                 case 81:
4796                 case 225:
4797                 case 209:
4798                 case 33:
4799                 case 17:
4800                         block = "CB_FMASK";
4801                         break;
4802                 case 162:
4803                 case 146:
4804                 case 98:
4805                 case 82:
4806                 case 226:
4807                 case 210:
4808                 case 34:
4809                 case 18:
4810                         block = "CB_CMASK";
4811                         break;
4812                 case 163:
4813                 case 147:
4814                 case 99:
4815                 case 83:
4816                 case 227:
4817                 case 211:
4818                 case 35:
4819                 case 19:
4820                         block = "CB_IMMED";
4821                         break;
4822                 case 164:
4823                 case 148:
4824                 case 100:
4825                 case 84:
4826                 case 228:
4827                 case 212:
4828                 case 36:
4829                 case 20:
4830                         block = "DB";
4831                         break;
4832                 case 165:
4833                 case 149:
4834                 case 101:
4835                 case 85:
4836                 case 229:
4837                 case 213:
4838                 case 37:
4839                 case 21:
4840                         block = "DB_HTILE";
4841                         break;
4842                 case 167:
4843                 case 151:
4844                 case 103:
4845                 case 87:
4846                 case 231:
4847                 case 215:
4848                 case 39:
4849                 case 23:
4850                         block = "DB_STEN";
4851                         break;
4852                 case 72:
4853                 case 68:
4854                 case 64:
4855                 case 8:
4856                 case 4:
4857                 case 0:
4858                 case 136:
4859                 case 132:
4860                 case 128:
4861                 case 200:
4862                 case 196:
4863                 case 192:
4864                         block = "TC";
4865                         break;
4866                 case 112:
4867                 case 48:
4868                         block = "CP";
4869                         break;
4870                 case 49:
4871                 case 177:
4872                 case 50:
4873                 case 178:
4874                         block = "SH";
4875                         break;
4876                 case 53:
4877                 case 190:
4878                         block = "VGT";
4879                         break;
4880                 case 117:
4881                         block = "IH";
4882                         break;
4883                 case 51:
4884                 case 115:
4885                         block = "RLC";
4886                         break;
4887                 case 119:
4888                 case 183:
4889                         block = "DMA0";
4890                         break;
4891                 case 61:
4892                         block = "DMA1";
4893                         break;
4894                 case 248:
4895                 case 120:
4896                         block = "HDP";
4897                         break;
4898                 default:
4899                         block = "unknown";
4900                         break;
4901                 }
4902         } else {
4903                 switch (mc_id) {
4904                 case 32:
4905                 case 16:
4906                 case 96:
4907                 case 80:
4908                 case 160:
4909                 case 144:
4910                 case 224:
4911                 case 208:
4912                         block = "CB";
4913                         break;
4914                 case 33:
4915                 case 17:
4916                 case 97:
4917                 case 81:
4918                 case 161:
4919                 case 145:
4920                 case 225:
4921                 case 209:
4922                         block = "CB_FMASK";
4923                         break;
4924                 case 34:
4925                 case 18:
4926                 case 98:
4927                 case 82:
4928                 case 162:
4929                 case 146:
4930                 case 226:
4931                 case 210:
4932                         block = "CB_CMASK";
4933                         break;
4934                 case 35:
4935                 case 19:
4936                 case 99:
4937                 case 83:
4938                 case 163:
4939                 case 147:
4940                 case 227:
4941                 case 211:
4942                         block = "CB_IMMED";
4943                         break;
4944                 case 36:
4945                 case 20:
4946                 case 100:
4947                 case 84:
4948                 case 164:
4949                 case 148:
4950                 case 228:
4951                 case 212:
4952                         block = "DB";
4953                         break;
4954                 case 37:
4955                 case 21:
4956                 case 101:
4957                 case 85:
4958                 case 165:
4959                 case 149:
4960                 case 229:
4961                 case 213:
4962                         block = "DB_HTILE";
4963                         break;
4964                 case 39:
4965                 case 23:
4966                 case 103:
4967                 case 87:
4968                 case 167:
4969                 case 151:
4970                 case 231:
4971                 case 215:
4972                         block = "DB_STEN";
4973                         break;
4974                 case 72:
4975                 case 68:
4976                 case 8:
4977                 case 4:
4978                 case 136:
4979                 case 132:
4980                 case 200:
4981                 case 196:
4982                         block = "TC";
4983                         break;
4984                 case 112:
4985                 case 48:
4986                         block = "CP";
4987                         break;
4988                 case 49:
4989                 case 177:
4990                 case 50:
4991                 case 178:
4992                         block = "SH";
4993                         break;
4994                 case 53:
4995                         block = "VGT";
4996                         break;
4997                 case 117:
4998                         block = "IH";
4999                         break;
5000                 case 51:
5001                 case 115:
5002                         block = "RLC";
5003                         break;
5004                 case 119:
5005                 case 183:
5006                         block = "DMA0";
5007                         break;
5008                 case 61:
5009                         block = "DMA1";
5010                         break;
5011                 case 248:
5012                 case 120:
5013                         block = "HDP";
5014                         break;
5015                 default:
5016                         block = "unknown";
5017                         break;
5018                 }
5019         }
5020
5021         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5022                protections, vmid, addr,
5023                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5024                block, mc_id);
5025 }
5026
5027 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5028                  unsigned vm_id, uint64_t pd_addr)
5029 {
5030         /* write new base address */
5031         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5032         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5033                                  WRITE_DATA_DST_SEL(0)));
5034
5035         if (vm_id < 8) {
5036                 radeon_ring_write(ring,
5037                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5038         } else {
5039                 radeon_ring_write(ring,
5040                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5041         }
5042         radeon_ring_write(ring, 0);
5043         radeon_ring_write(ring, pd_addr >> 12);
5044
5045         /* flush hdp cache */
5046         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5047         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5048                                  WRITE_DATA_DST_SEL(0)));
5049         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5050         radeon_ring_write(ring, 0);
5051         radeon_ring_write(ring, 0x1);
5052
5053         /* bits 0-15 are the VM contexts0-15 */
5054         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5055         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5056                                  WRITE_DATA_DST_SEL(0)));
5057         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5058         radeon_ring_write(ring, 0);
5059         radeon_ring_write(ring, 1 << vm_id);
5060
5061         /* wait for the invalidate to complete */
5062         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5063         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5064                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5065         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5066         radeon_ring_write(ring, 0);
5067         radeon_ring_write(ring, 0); /* ref */
5068         radeon_ring_write(ring, 0); /* mask */
5069         radeon_ring_write(ring, 0x20); /* poll interval */
5070
5071         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5072         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5073         radeon_ring_write(ring, 0x0);
5074 }
5075
5076 /*
5077  *  Power and clock gating
5078  */
5079 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5080 {
5081         int i;
5082
5083         for (i = 0; i < rdev->usec_timeout; i++) {
5084                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5085                         break;
5086                 udelay(1);
5087         }
5088
5089         for (i = 0; i < rdev->usec_timeout; i++) {
5090                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5091                         break;
5092                 udelay(1);
5093         }
5094 }
5095
5096 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5097                                          bool enable)
5098 {
5099         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5100         u32 mask;
5101         int i;
5102
5103         if (enable)
5104                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5105         else
5106                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5107         WREG32(CP_INT_CNTL_RING0, tmp);
5108
5109         if (!enable) {
5110                 /* read a gfx register */
5111                 tmp = RREG32(DB_DEPTH_INFO);
5112
5113                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5114                 for (i = 0; i < rdev->usec_timeout; i++) {
5115                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5116                                 break;
5117                         udelay(1);
5118                 }
5119         }
5120 }
5121
5122 static void si_set_uvd_dcm(struct radeon_device *rdev,
5123                            bool sw_mode)
5124 {
5125         u32 tmp, tmp2;
5126
5127         tmp = RREG32(UVD_CGC_CTRL);
5128         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5129         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5130
5131         if (sw_mode) {
5132                 tmp &= ~0x7ffff800;
5133                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5134         } else {
5135                 tmp |= 0x7ffff800;
5136                 tmp2 = 0;
5137         }
5138
5139         WREG32(UVD_CGC_CTRL, tmp);
5140         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5141 }
5142
5143 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5144 {
5145         bool hw_mode = true;
5146
5147         if (hw_mode) {
5148                 si_set_uvd_dcm(rdev, false);
5149         } else {
5150                 u32 tmp = RREG32(UVD_CGC_CTRL);
5151                 tmp &= ~DCM;
5152                 WREG32(UVD_CGC_CTRL, tmp);
5153         }
5154 }
5155
5156 static u32 si_halt_rlc(struct radeon_device *rdev)
5157 {
5158         u32 data, orig;
5159
5160         orig = data = RREG32(RLC_CNTL);
5161
5162         if (data & RLC_ENABLE) {
5163                 data &= ~RLC_ENABLE;
5164                 WREG32(RLC_CNTL, data);
5165
5166                 si_wait_for_rlc_serdes(rdev);
5167         }
5168
5169         return orig;
5170 }
5171
5172 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5173 {
5174         u32 tmp;
5175
5176         tmp = RREG32(RLC_CNTL);
5177         if (tmp != rlc)
5178                 WREG32(RLC_CNTL, rlc);
5179 }
5180
5181 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5182 {
5183         u32 data, orig;
5184
5185         orig = data = RREG32(DMA_PG);
5186         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5187                 data |= PG_CNTL_ENABLE;
5188         else
5189                 data &= ~PG_CNTL_ENABLE;
5190         if (orig != data)
5191                 WREG32(DMA_PG, data);
5192 }
5193
5194 static void si_init_dma_pg(struct radeon_device *rdev)
5195 {
5196         u32 tmp;
5197
5198         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5199         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5200
5201         for (tmp = 0; tmp < 5; tmp++)
5202                 WREG32(DMA_PGFSM_WRITE, 0);
5203 }
5204
5205 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5206                                bool enable)
5207 {
5208         u32 tmp;
5209
5210         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5211                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5212                 WREG32(RLC_TTOP_D, tmp);
5213
5214                 tmp = RREG32(RLC_PG_CNTL);
5215                 tmp |= GFX_PG_ENABLE;
5216                 WREG32(RLC_PG_CNTL, tmp);
5217
5218                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5219                 tmp |= AUTO_PG_EN;
5220                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5221         } else {
5222                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5223                 tmp &= ~AUTO_PG_EN;
5224                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5225
5226                 tmp = RREG32(DB_RENDER_CONTROL);
5227         }
5228 }
5229
5230 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5231 {
5232         u32 tmp;
5233
5234         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5235
5236         tmp = RREG32(RLC_PG_CNTL);
5237         tmp |= GFX_PG_SRC;
5238         WREG32(RLC_PG_CNTL, tmp);
5239
5240         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5241
5242         tmp = RREG32(RLC_AUTO_PG_CTRL);
5243
5244         tmp &= ~GRBM_REG_SGIT_MASK;
5245         tmp |= GRBM_REG_SGIT(0x700);
5246         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5247         WREG32(RLC_AUTO_PG_CTRL, tmp);
5248 }
5249
5250 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5251 {
5252         u32 mask = 0, tmp, tmp1;
5253         int i;
5254
5255         si_select_se_sh(rdev, se, sh);
5256         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5257         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5258         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5259
5260         tmp &= 0xffff0000;
5261
5262         tmp |= tmp1;
5263         tmp >>= 16;
5264
5265         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5266                 mask <<= 1;
5267                 mask |= 1;
5268         }
5269
5270         return (~tmp) & mask;
5271 }
5272
5273 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5274 {
5275         u32 i, j, k, active_cu_number = 0;
5276         u32 mask, counter, cu_bitmap;
5277         u32 tmp = 0;
5278
5279         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5280                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5281                         mask = 1;
5282                         cu_bitmap = 0;
5283                         counter  = 0;
5284                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5285                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5286                                         if (counter < 2)
5287                                                 cu_bitmap |= mask;
5288                                         counter++;
5289                                 }
5290                                 mask <<= 1;
5291                         }
5292
5293                         active_cu_number += counter;
5294                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5295                 }
5296         }
5297
5298         WREG32(RLC_PG_AO_CU_MASK, tmp);
5299
5300         tmp = RREG32(RLC_MAX_PG_CU);
5301         tmp &= ~MAX_PU_CU_MASK;
5302         tmp |= MAX_PU_CU(active_cu_number);
5303         WREG32(RLC_MAX_PG_CU, tmp);
5304 }
5305
5306 static void si_enable_cgcg(struct radeon_device *rdev,
5307                            bool enable)
5308 {
5309         u32 data, orig, tmp;
5310
5311         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5312
5313         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5314                 si_enable_gui_idle_interrupt(rdev, true);
5315
5316                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5317
5318                 tmp = si_halt_rlc(rdev);
5319
5320                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5321                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5322                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5323
5324                 si_wait_for_rlc_serdes(rdev);
5325
5326                 si_update_rlc(rdev, tmp);
5327
5328                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5329
5330                 data |= CGCG_EN | CGLS_EN;
5331         } else {
5332                 si_enable_gui_idle_interrupt(rdev, false);
5333
5334                 RREG32(CB_CGTT_SCLK_CTRL);
5335                 RREG32(CB_CGTT_SCLK_CTRL);
5336                 RREG32(CB_CGTT_SCLK_CTRL);
5337                 RREG32(CB_CGTT_SCLK_CTRL);
5338
5339                 data &= ~(CGCG_EN | CGLS_EN);
5340         }
5341
5342         if (orig != data)
5343                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5344 }
5345
5346 static void si_enable_mgcg(struct radeon_device *rdev,
5347                            bool enable)
5348 {
5349         u32 data, orig, tmp = 0;
5350
5351         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5352                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5353                 data = 0x96940200;
5354                 if (orig != data)
5355                         WREG32(CGTS_SM_CTRL_REG, data);
5356
5357                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5358                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5359                         data |= CP_MEM_LS_EN;
5360                         if (orig != data)
5361                                 WREG32(CP_MEM_SLP_CNTL, data);
5362                 }
5363
5364                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5365                 data &= 0xffffffc0;
5366                 if (orig != data)
5367                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5368
5369                 tmp = si_halt_rlc(rdev);
5370
5371                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5372                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5373                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5374
5375                 si_update_rlc(rdev, tmp);
5376         } else {
5377                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5378                 data |= 0x00000003;
5379                 if (orig != data)
5380                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5381
5382                 data = RREG32(CP_MEM_SLP_CNTL);
5383                 if (data & CP_MEM_LS_EN) {
5384                         data &= ~CP_MEM_LS_EN;
5385                         WREG32(CP_MEM_SLP_CNTL, data);
5386                 }
5387                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5388                 data |= LS_OVERRIDE | OVERRIDE;
5389                 if (orig != data)
5390                         WREG32(CGTS_SM_CTRL_REG, data);
5391
5392                 tmp = si_halt_rlc(rdev);
5393
5394                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5395                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5396                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5397
5398                 si_update_rlc(rdev, tmp);
5399         }
5400 }
5401
5402 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5403                                bool enable)
5404 {
5405         u32 orig, data, tmp;
5406
5407         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5408                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5409                 tmp |= 0x3fff;
5410                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5411
5412                 orig = data = RREG32(UVD_CGC_CTRL);
5413                 data |= DCM;
5414                 if (orig != data)
5415                         WREG32(UVD_CGC_CTRL, data);
5416
5417                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5418                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5419         } else {
5420                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5421                 tmp &= ~0x3fff;
5422                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5423
5424                 orig = data = RREG32(UVD_CGC_CTRL);
5425                 data &= ~DCM;
5426                 if (orig != data)
5427                         WREG32(UVD_CGC_CTRL, data);
5428
5429                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5430                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5431         }
5432 }
5433
5434 static const u32 mc_cg_registers[] =
5435 {
5436         MC_HUB_MISC_HUB_CG,
5437         MC_HUB_MISC_SIP_CG,
5438         MC_HUB_MISC_VM_CG,
5439         MC_XPB_CLK_GAT,
5440         ATC_MISC_CG,
5441         MC_CITF_MISC_WR_CG,
5442         MC_CITF_MISC_RD_CG,
5443         MC_CITF_MISC_VM_CG,
5444         VM_L2_CG,
5445 };
5446
5447 static void si_enable_mc_ls(struct radeon_device *rdev,
5448                             bool enable)
5449 {
5450         int i;
5451         u32 orig, data;
5452
5453         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5454                 orig = data = RREG32(mc_cg_registers[i]);
5455                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5456                         data |= MC_LS_ENABLE;
5457                 else
5458                         data &= ~MC_LS_ENABLE;
5459                 if (data != orig)
5460                         WREG32(mc_cg_registers[i], data);
5461         }
5462 }
5463
5464 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5465                                bool enable)
5466 {
5467         int i;
5468         u32 orig, data;
5469
5470         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5471                 orig = data = RREG32(mc_cg_registers[i]);
5472                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5473                         data |= MC_CG_ENABLE;
5474                 else
5475                         data &= ~MC_CG_ENABLE;
5476                 if (data != orig)
5477                         WREG32(mc_cg_registers[i], data);
5478         }
5479 }
5480
5481 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5482                                bool enable)
5483 {
5484         u32 orig, data, offset;
5485         int i;
5486
5487         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5488                 for (i = 0; i < 2; i++) {
5489                         if (i == 0)
5490                                 offset = DMA0_REGISTER_OFFSET;
5491                         else
5492                                 offset = DMA1_REGISTER_OFFSET;
5493                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5494                         data &= ~MEM_POWER_OVERRIDE;
5495                         if (data != orig)
5496                                 WREG32(DMA_POWER_CNTL + offset, data);
5497                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5498                 }
5499         } else {
5500                 for (i = 0; i < 2; i++) {
5501                         if (i == 0)
5502                                 offset = DMA0_REGISTER_OFFSET;
5503                         else
5504                                 offset = DMA1_REGISTER_OFFSET;
5505                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5506                         data |= MEM_POWER_OVERRIDE;
5507                         if (data != orig)
5508                                 WREG32(DMA_POWER_CNTL + offset, data);
5509
5510                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5511                         data = 0xff000000;
5512                         if (data != orig)
5513                                 WREG32(DMA_CLK_CTRL + offset, data);
5514                 }
5515         }
5516 }
5517
5518 static void si_enable_bif_mgls(struct radeon_device *rdev,
5519                                bool enable)
5520 {
5521         u32 orig, data;
5522
5523         orig = data = RREG32_PCIE(PCIE_CNTL2);
5524
5525         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5526                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5527                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5528         else
5529                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5530                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5531
5532         if (orig != data)
5533                 WREG32_PCIE(PCIE_CNTL2, data);
5534 }
5535
5536 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5537                                bool enable)
5538 {
5539         u32 orig, data;
5540
5541         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5542
5543         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5544                 data &= ~CLOCK_GATING_DIS;
5545         else
5546                 data |= CLOCK_GATING_DIS;
5547
5548         if (orig != data)
5549                 WREG32(HDP_HOST_PATH_CNTL, data);
5550 }
5551
5552 static void si_enable_hdp_ls(struct radeon_device *rdev,
5553                              bool enable)
5554 {
5555         u32 orig, data;
5556
5557         orig = data = RREG32(HDP_MEM_POWER_LS);
5558
5559         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5560                 data |= HDP_LS_ENABLE;
5561         else
5562                 data &= ~HDP_LS_ENABLE;
5563
5564         if (orig != data)
5565                 WREG32(HDP_MEM_POWER_LS, data);
5566 }
5567
5568 static void si_update_cg(struct radeon_device *rdev,
5569                          u32 block, bool enable)
5570 {
5571         if (block & RADEON_CG_BLOCK_GFX) {
5572                 si_enable_gui_idle_interrupt(rdev, false);
5573                 /* order matters! */
5574                 if (enable) {
5575                         si_enable_mgcg(rdev, true);
5576                         si_enable_cgcg(rdev, true);
5577                 } else {
5578                         si_enable_cgcg(rdev, false);
5579                         si_enable_mgcg(rdev, false);
5580                 }
5581                 si_enable_gui_idle_interrupt(rdev, true);
5582         }
5583
5584         if (block & RADEON_CG_BLOCK_MC) {
5585                 si_enable_mc_mgcg(rdev, enable);
5586                 si_enable_mc_ls(rdev, enable);
5587         }
5588
5589         if (block & RADEON_CG_BLOCK_SDMA) {
5590                 si_enable_dma_mgcg(rdev, enable);
5591         }
5592
5593         if (block & RADEON_CG_BLOCK_BIF) {
5594                 si_enable_bif_mgls(rdev, enable);
5595         }
5596
5597         if (block & RADEON_CG_BLOCK_UVD) {
5598                 if (rdev->has_uvd) {
5599                         si_enable_uvd_mgcg(rdev, enable);
5600                 }
5601         }
5602
5603         if (block & RADEON_CG_BLOCK_HDP) {
5604                 si_enable_hdp_mgcg(rdev, enable);
5605                 si_enable_hdp_ls(rdev, enable);
5606         }
5607 }
5608
5609 static void si_init_cg(struct radeon_device *rdev)
5610 {
5611         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5612                             RADEON_CG_BLOCK_MC |
5613                             RADEON_CG_BLOCK_SDMA |
5614                             RADEON_CG_BLOCK_BIF |
5615                             RADEON_CG_BLOCK_HDP), true);
5616         if (rdev->has_uvd) {
5617                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5618                 si_init_uvd_internal_cg(rdev);
5619         }
5620 }
5621
5622 static void si_fini_cg(struct radeon_device *rdev)
5623 {
5624         if (rdev->has_uvd) {
5625                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5626         }
5627         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5628                             RADEON_CG_BLOCK_MC |
5629                             RADEON_CG_BLOCK_SDMA |
5630                             RADEON_CG_BLOCK_BIF |
5631                             RADEON_CG_BLOCK_HDP), false);
5632 }
5633
5634 u32 si_get_csb_size(struct radeon_device *rdev)
5635 {
5636         u32 count = 0;
5637         const struct cs_section_def *sect = NULL;
5638         const struct cs_extent_def *ext = NULL;
5639
5640         if (rdev->rlc.cs_data == NULL)
5641                 return 0;
5642
5643         /* begin clear state */
5644         count += 2;
5645         /* context control state */
5646         count += 3;
5647
5648         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5649                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5650                         if (sect->id == SECT_CONTEXT)
5651                                 count += 2 + ext->reg_count;
5652                         else
5653                                 return 0;
5654                 }
5655         }
5656         /* pa_sc_raster_config */
5657         count += 3;
5658         /* end clear state */
5659         count += 2;
5660         /* clear state */
5661         count += 2;
5662
5663         return count;
5664 }
5665
5666 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5667 {
5668         u32 count = 0, i;
5669         const struct cs_section_def *sect = NULL;
5670         const struct cs_extent_def *ext = NULL;
5671
5672         if (rdev->rlc.cs_data == NULL)
5673                 return;
5674         if (buffer == NULL)
5675                 return;
5676
5677         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5678         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5679
5680         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5681         buffer[count++] = cpu_to_le32(0x80000000);
5682         buffer[count++] = cpu_to_le32(0x80000000);
5683
5684         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5685                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5686                         if (sect->id == SECT_CONTEXT) {
5687                                 buffer[count++] =
5688                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5689                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5690                                 for (i = 0; i < ext->reg_count; i++)
5691                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5692                         } else {
5693                                 return;
5694                         }
5695                 }
5696         }
5697
5698         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5699         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5700         switch (rdev->family) {
5701         case CHIP_TAHITI:
5702         case CHIP_PITCAIRN:
5703                 buffer[count++] = cpu_to_le32(0x2a00126a);
5704                 break;
5705         case CHIP_VERDE:
5706                 buffer[count++] = cpu_to_le32(0x0000124a);
5707                 break;
5708         case CHIP_OLAND:
5709                 buffer[count++] = cpu_to_le32(0x00000082);
5710                 break;
5711         case CHIP_HAINAN:
5712                 buffer[count++] = cpu_to_le32(0x00000000);
5713                 break;
5714         default:
5715                 buffer[count++] = cpu_to_le32(0x00000000);
5716                 break;
5717         }
5718
5719         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5720         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5721
5722         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5723         buffer[count++] = cpu_to_le32(0);
5724 }
5725
5726 static void si_init_pg(struct radeon_device *rdev)
5727 {
5728         if (rdev->pg_flags) {
5729                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5730                         si_init_dma_pg(rdev);
5731                 }
5732                 si_init_ao_cu_mask(rdev);
5733                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5734                         si_init_gfx_cgpg(rdev);
5735                 } else {
5736                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5737                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5738                 }
5739                 si_enable_dma_pg(rdev, true);
5740                 si_enable_gfx_cgpg(rdev, true);
5741         } else {
5742                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5743                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5744         }
5745 }
5746
5747 static void si_fini_pg(struct radeon_device *rdev)
5748 {
5749         if (rdev->pg_flags) {
5750                 si_enable_dma_pg(rdev, false);
5751                 si_enable_gfx_cgpg(rdev, false);
5752         }
5753 }
5754
5755 /*
5756  * RLC
5757  */
5758 void si_rlc_reset(struct radeon_device *rdev)
5759 {
5760         u32 tmp = RREG32(GRBM_SOFT_RESET);
5761
5762         tmp |= SOFT_RESET_RLC;
5763         WREG32(GRBM_SOFT_RESET, tmp);
5764         udelay(50);
5765         tmp &= ~SOFT_RESET_RLC;
5766         WREG32(GRBM_SOFT_RESET, tmp);
5767         udelay(50);
5768 }
5769
5770 static void si_rlc_stop(struct radeon_device *rdev)
5771 {
5772         WREG32(RLC_CNTL, 0);
5773
5774         si_enable_gui_idle_interrupt(rdev, false);
5775
5776         si_wait_for_rlc_serdes(rdev);
5777 }
5778
5779 static void si_rlc_start(struct radeon_device *rdev)
5780 {
5781         WREG32(RLC_CNTL, RLC_ENABLE);
5782
5783         si_enable_gui_idle_interrupt(rdev, true);
5784
5785         udelay(50);
5786 }
5787
5788 static bool si_lbpw_supported(struct radeon_device *rdev)
5789 {
5790         u32 tmp;
5791
5792         /* Enable LBPW only for DDR3 */
5793         tmp = RREG32(MC_SEQ_MISC0);
5794         if ((tmp & 0xF0000000) == 0xB0000000)
5795                 return true;
5796         return false;
5797 }
5798
5799 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5800 {
5801         u32 tmp;
5802
5803         tmp = RREG32(RLC_LB_CNTL);
5804         if (enable)
5805                 tmp |= LOAD_BALANCE_ENABLE;
5806         else
5807                 tmp &= ~LOAD_BALANCE_ENABLE;
5808         WREG32(RLC_LB_CNTL, tmp);
5809
5810         if (!enable) {
5811                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5812                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5813         }
5814 }
5815
5816 static int si_rlc_resume(struct radeon_device *rdev)
5817 {
5818         u32 i;
5819
5820         if (!rdev->rlc_fw)
5821                 return -EINVAL;
5822
5823         si_rlc_stop(rdev);
5824
5825         si_rlc_reset(rdev);
5826
5827         si_init_pg(rdev);
5828
5829         si_init_cg(rdev);
5830
5831         WREG32(RLC_RL_BASE, 0);
5832         WREG32(RLC_RL_SIZE, 0);
5833         WREG32(RLC_LB_CNTL, 0);
5834         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5835         WREG32(RLC_LB_CNTR_INIT, 0);
5836         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5837
5838         WREG32(RLC_MC_CNTL, 0);
5839         WREG32(RLC_UCODE_CNTL, 0);
5840
5841         if (rdev->new_fw) {
5842                 const struct rlc_firmware_header_v1_0 *hdr =
5843                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5844                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5845                 const __le32 *fw_data = (const __le32 *)
5846                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5847
5848                 radeon_ucode_print_rlc_hdr(&hdr->header);
5849
5850                 for (i = 0; i < fw_size; i++) {
5851                         WREG32(RLC_UCODE_ADDR, i);
5852                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5853                 }
5854         } else {
5855                 const __be32 *fw_data =
5856                         (const __be32 *)rdev->rlc_fw->data;
5857                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5858                         WREG32(RLC_UCODE_ADDR, i);
5859                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5860                 }
5861         }
5862         WREG32(RLC_UCODE_ADDR, 0);
5863
5864         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5865
5866         si_rlc_start(rdev);
5867
5868         return 0;
5869 }
5870
5871 static void si_enable_interrupts(struct radeon_device *rdev)
5872 {
5873         u32 ih_cntl = RREG32(IH_CNTL);
5874         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5875
5876         ih_cntl |= ENABLE_INTR;
5877         ih_rb_cntl |= IH_RB_ENABLE;
5878         WREG32(IH_CNTL, ih_cntl);
5879         WREG32(IH_RB_CNTL, ih_rb_cntl);
5880         rdev->ih.enabled = true;
5881 }
5882
5883 static void si_disable_interrupts(struct radeon_device *rdev)
5884 {
5885         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5886         u32 ih_cntl = RREG32(IH_CNTL);
5887
5888         ih_rb_cntl &= ~IH_RB_ENABLE;
5889         ih_cntl &= ~ENABLE_INTR;
5890         WREG32(IH_RB_CNTL, ih_rb_cntl);
5891         WREG32(IH_CNTL, ih_cntl);
5892         /* set rptr, wptr to 0 */
5893         WREG32(IH_RB_RPTR, 0);
5894         WREG32(IH_RB_WPTR, 0);
5895         rdev->ih.enabled = false;
5896         rdev->ih.rptr = 0;
5897 }
5898
5899 static void si_disable_interrupt_state(struct radeon_device *rdev)
5900 {
5901         u32 tmp;
5902
5903         tmp = RREG32(CP_INT_CNTL_RING0) &
5904                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5905         WREG32(CP_INT_CNTL_RING0, tmp);
5906         WREG32(CP_INT_CNTL_RING1, 0);
5907         WREG32(CP_INT_CNTL_RING2, 0);
5908         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5909         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5910         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5911         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5912         WREG32(GRBM_INT_CNTL, 0);
5913         if (rdev->num_crtc >= 2) {
5914                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5915                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5916         }
5917         if (rdev->num_crtc >= 4) {
5918                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5919                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5920         }
5921         if (rdev->num_crtc >= 6) {
5922                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5923                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5924         }
5925
5926         if (rdev->num_crtc >= 2) {
5927                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5928                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5929         }
5930         if (rdev->num_crtc >= 4) {
5931                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5932                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5933         }
5934         if (rdev->num_crtc >= 6) {
5935                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5936                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5937         }
5938
5939         if (!ASIC_IS_NODCE(rdev)) {
5940                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5941
5942                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5943                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5944                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5945                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5946                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5947                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5948                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5949                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5950                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5951                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5952                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5953                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5954         }
5955 }
5956
5957 static int si_irq_init(struct radeon_device *rdev)
5958 {
5959         int ret = 0;
5960         int rb_bufsz;
5961         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5962
5963         /* allocate ring */
5964         ret = r600_ih_ring_alloc(rdev);
5965         if (ret)
5966                 return ret;
5967
5968         /* disable irqs */
5969         si_disable_interrupts(rdev);
5970
5971         /* init rlc */
5972         ret = si_rlc_resume(rdev);
5973         if (ret) {
5974                 r600_ih_ring_fini(rdev);
5975                 return ret;
5976         }
5977
5978         /* setup interrupt control */
5979         /* set dummy read address to ring address */
5980         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5981         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5982         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5983          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5984          */
5985         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5986         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5987         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5988         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5989
5990         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5991         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5992
5993         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5994                       IH_WPTR_OVERFLOW_CLEAR |
5995                       (rb_bufsz << 1));
5996
5997         if (rdev->wb.enabled)
5998                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5999
6000         /* set the writeback address whether it's enabled or not */
6001         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6002         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6003
6004         WREG32(IH_RB_CNTL, ih_rb_cntl);
6005
6006         /* set rptr, wptr to 0 */
6007         WREG32(IH_RB_RPTR, 0);
6008         WREG32(IH_RB_WPTR, 0);
6009
6010         /* Default settings for IH_CNTL (disabled at first) */
6011         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6012         /* RPTR_REARM only works if msi's are enabled */
6013         if (rdev->msi_enabled)
6014                 ih_cntl |= RPTR_REARM;
6015         WREG32(IH_CNTL, ih_cntl);
6016
6017         /* force the active interrupt state to all disabled */
6018         si_disable_interrupt_state(rdev);
6019
6020         pci_set_master(rdev->pdev);
6021
6022         /* enable irqs */
6023         si_enable_interrupts(rdev);
6024
6025         return ret;
6026 }
6027
6028 int si_irq_set(struct radeon_device *rdev)
6029 {
6030         u32 cp_int_cntl;
6031         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6032         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6033         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6034         u32 grbm_int_cntl = 0;
6035         u32 dma_cntl, dma_cntl1;
6036         u32 thermal_int = 0;
6037
6038         if (!rdev->irq.installed) {
6039                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6040                 return -EINVAL;
6041         }
6042         /* don't enable anything if the ih is disabled */
6043         if (!rdev->ih.enabled) {
6044                 si_disable_interrupts(rdev);
6045                 /* force the active interrupt state to all disabled */
6046                 si_disable_interrupt_state(rdev);
6047                 return 0;
6048         }
6049
6050         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6051                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6052
6053         if (!ASIC_IS_NODCE(rdev)) {
6054                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6055                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6056                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6057                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6058                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6059                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6060         }
6061
6062         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6063         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6064
6065         thermal_int = RREG32(CG_THERMAL_INT) &
6066                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6067
6068         /* enable CP interrupts on all rings */
6069         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6070                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6071                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6072         }
6073         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6074                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6075                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6076         }
6077         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6078                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6079                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6080         }
6081         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6082                 DRM_DEBUG("si_irq_set: sw int dma\n");
6083                 dma_cntl |= TRAP_ENABLE;
6084         }
6085
6086         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6087                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6088                 dma_cntl1 |= TRAP_ENABLE;
6089         }
6090         if (rdev->irq.crtc_vblank_int[0] ||
6091             atomic_read(&rdev->irq.pflip[0])) {
6092                 DRM_DEBUG("si_irq_set: vblank 0\n");
6093                 crtc1 |= VBLANK_INT_MASK;
6094         }
6095         if (rdev->irq.crtc_vblank_int[1] ||
6096             atomic_read(&rdev->irq.pflip[1])) {
6097                 DRM_DEBUG("si_irq_set: vblank 1\n");
6098                 crtc2 |= VBLANK_INT_MASK;
6099         }
6100         if (rdev->irq.crtc_vblank_int[2] ||
6101             atomic_read(&rdev->irq.pflip[2])) {
6102                 DRM_DEBUG("si_irq_set: vblank 2\n");
6103                 crtc3 |= VBLANK_INT_MASK;
6104         }
6105         if (rdev->irq.crtc_vblank_int[3] ||
6106             atomic_read(&rdev->irq.pflip[3])) {
6107                 DRM_DEBUG("si_irq_set: vblank 3\n");
6108                 crtc4 |= VBLANK_INT_MASK;
6109         }
6110         if (rdev->irq.crtc_vblank_int[4] ||
6111             atomic_read(&rdev->irq.pflip[4])) {
6112                 DRM_DEBUG("si_irq_set: vblank 4\n");
6113                 crtc5 |= VBLANK_INT_MASK;
6114         }
6115         if (rdev->irq.crtc_vblank_int[5] ||
6116             atomic_read(&rdev->irq.pflip[5])) {
6117                 DRM_DEBUG("si_irq_set: vblank 5\n");
6118                 crtc6 |= VBLANK_INT_MASK;
6119         }
6120         if (rdev->irq.hpd[0]) {
6121                 DRM_DEBUG("si_irq_set: hpd 1\n");
6122                 hpd1 |= DC_HPDx_INT_EN;
6123         }
6124         if (rdev->irq.hpd[1]) {
6125                 DRM_DEBUG("si_irq_set: hpd 2\n");
6126                 hpd2 |= DC_HPDx_INT_EN;
6127         }
6128         if (rdev->irq.hpd[2]) {
6129                 DRM_DEBUG("si_irq_set: hpd 3\n");
6130                 hpd3 |= DC_HPDx_INT_EN;
6131         }
6132         if (rdev->irq.hpd[3]) {
6133                 DRM_DEBUG("si_irq_set: hpd 4\n");
6134                 hpd4 |= DC_HPDx_INT_EN;
6135         }
6136         if (rdev->irq.hpd[4]) {
6137                 DRM_DEBUG("si_irq_set: hpd 5\n");
6138                 hpd5 |= DC_HPDx_INT_EN;
6139         }
6140         if (rdev->irq.hpd[5]) {
6141                 DRM_DEBUG("si_irq_set: hpd 6\n");
6142                 hpd6 |= DC_HPDx_INT_EN;
6143         }
6144
6145         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6146         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6147         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6148
6149         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6150         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6151
6152         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6153
6154         if (rdev->irq.dpm_thermal) {
6155                 DRM_DEBUG("dpm thermal\n");
6156                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6157         }
6158
6159         if (rdev->num_crtc >= 2) {
6160                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6161                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6162         }
6163         if (rdev->num_crtc >= 4) {
6164                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6165                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6166         }
6167         if (rdev->num_crtc >= 6) {
6168                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6169                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6170         }
6171
6172         if (rdev->num_crtc >= 2) {
6173                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6174                        GRPH_PFLIP_INT_MASK);
6175                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6176                        GRPH_PFLIP_INT_MASK);
6177         }
6178         if (rdev->num_crtc >= 4) {
6179                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6180                        GRPH_PFLIP_INT_MASK);
6181                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6182                        GRPH_PFLIP_INT_MASK);
6183         }
6184         if (rdev->num_crtc >= 6) {
6185                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6186                        GRPH_PFLIP_INT_MASK);
6187                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6188                        GRPH_PFLIP_INT_MASK);
6189         }
6190
6191         if (!ASIC_IS_NODCE(rdev)) {
6192                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6193                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6194                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6195                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6196                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6197                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6198         }
6199
6200         WREG32(CG_THERMAL_INT, thermal_int);
6201
6202         return 0;
6203 }
6204
6205 static inline void si_irq_ack(struct radeon_device *rdev)
6206 {
6207         u32 tmp;
6208
6209         if (ASIC_IS_NODCE(rdev))
6210                 return;
6211
6212         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6213         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6214         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6215         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6216         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6217         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6218         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6219         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6220         if (rdev->num_crtc >= 4) {
6221                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6222                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6223         }
6224         if (rdev->num_crtc >= 6) {
6225                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6226                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6227         }
6228
6229         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6230                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6231         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6232                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6233         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6234                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6235         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6236                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6237         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6238                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6239         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6240                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6241
6242         if (rdev->num_crtc >= 4) {
6243                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6244                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6245                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6246                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6247                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6248                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6249                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6250                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6251                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6252                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6253                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6254                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6255         }
6256
6257         if (rdev->num_crtc >= 6) {
6258                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6259                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6260                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6261                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6262                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6263                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6264                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6265                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6266                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6267                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6268                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6269                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6270         }
6271
6272         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6273                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6274                 tmp |= DC_HPDx_INT_ACK;
6275                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6276         }
6277         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6278                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6279                 tmp |= DC_HPDx_INT_ACK;
6280                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6281         }
6282         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6283                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6284                 tmp |= DC_HPDx_INT_ACK;
6285                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6286         }
6287         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6288                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6289                 tmp |= DC_HPDx_INT_ACK;
6290                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6291         }
6292         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6293                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6294                 tmp |= DC_HPDx_INT_ACK;
6295                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6296         }
6297         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6298                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6299                 tmp |= DC_HPDx_INT_ACK;
6300                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6301         }
6302 }
6303
6304 static void si_irq_disable(struct radeon_device *rdev)
6305 {
6306         si_disable_interrupts(rdev);
6307         /* Wait and acknowledge irq */
6308         mdelay(1);
6309         si_irq_ack(rdev);
6310         si_disable_interrupt_state(rdev);
6311 }
6312
6313 static void si_irq_suspend(struct radeon_device *rdev)
6314 {
6315         si_irq_disable(rdev);
6316         si_rlc_stop(rdev);
6317 }
6318
6319 static void si_irq_fini(struct radeon_device *rdev)
6320 {
6321         si_irq_suspend(rdev);
6322         r600_ih_ring_fini(rdev);
6323 }
6324
6325 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6326 {
6327         u32 wptr, tmp;
6328
6329         if (rdev->wb.enabled)
6330                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6331         else
6332                 wptr = RREG32(IH_RB_WPTR);
6333
6334         if (wptr & RB_OVERFLOW) {
6335                 wptr &= ~RB_OVERFLOW;
6336                 /* When a ring buffer overflow happen start parsing interrupt
6337                  * from the last not overwritten vector (wptr + 16). Hopefully
6338                  * this should allow us to catchup.
6339                  */
6340                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6341                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6342                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6343                 tmp = RREG32(IH_RB_CNTL);
6344                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6345                 WREG32(IH_RB_CNTL, tmp);
6346         }
6347         return (wptr & rdev->ih.ptr_mask);
6348 }
6349
6350 /*        SI IV Ring
6351  * Each IV ring entry is 128 bits:
6352  * [7:0]    - interrupt source id
6353  * [31:8]   - reserved
6354  * [59:32]  - interrupt source data
6355  * [63:60]  - reserved
6356  * [71:64]  - RINGID
6357  * [79:72]  - VMID
6358  * [127:80] - reserved
6359  */
6360 int si_irq_process(struct radeon_device *rdev)
6361 {
6362         u32 wptr;
6363         u32 rptr;
6364         u32 src_id, src_data, ring_id;
6365         u32 ring_index;
6366         bool queue_hotplug = false;
6367         bool queue_thermal = false;
6368         u32 status, addr;
6369
6370         if (!rdev->ih.enabled || rdev->shutdown)
6371                 return IRQ_NONE;
6372
6373         wptr = si_get_ih_wptr(rdev);
6374
6375 restart_ih:
6376         /* is somebody else already processing irqs? */
6377         if (atomic_xchg(&rdev->ih.lock, 1))
6378                 return IRQ_NONE;
6379
6380         rptr = rdev->ih.rptr;
6381         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6382
6383         /* Order reading of wptr vs. reading of IH ring data */
6384         rmb();
6385
6386         /* display interrupts */
6387         si_irq_ack(rdev);
6388
6389         while (rptr != wptr) {
6390                 /* wptr/rptr are in bytes! */
6391                 ring_index = rptr / 4;
6392                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6393                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6394                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6395
6396                 switch (src_id) {
6397                 case 1: /* D1 vblank/vline */
6398                         switch (src_data) {
6399                         case 0: /* D1 vblank */
6400                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6401                                         if (rdev->irq.crtc_vblank_int[0]) {
6402                                                 drm_handle_vblank(rdev->ddev, 0);
6403                                                 rdev->pm.vblank_sync = true;
6404                                                 wake_up(&rdev->irq.vblank_queue);
6405                                         }
6406                                         if (atomic_read(&rdev->irq.pflip[0]))
6407                                                 radeon_crtc_handle_vblank(rdev, 0);
6408                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6409                                         DRM_DEBUG("IH: D1 vblank\n");
6410                                 }
6411                                 break;
6412                         case 1: /* D1 vline */
6413                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6414                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6415                                         DRM_DEBUG("IH: D1 vline\n");
6416                                 }
6417                                 break;
6418                         default:
6419                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6420                                 break;
6421                         }
6422                         break;
6423                 case 2: /* D2 vblank/vline */
6424                         switch (src_data) {
6425                         case 0: /* D2 vblank */
6426                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6427                                         if (rdev->irq.crtc_vblank_int[1]) {
6428                                                 drm_handle_vblank(rdev->ddev, 1);
6429                                                 rdev->pm.vblank_sync = true;
6430                                                 wake_up(&rdev->irq.vblank_queue);
6431                                         }
6432                                         if (atomic_read(&rdev->irq.pflip[1]))
6433                                                 radeon_crtc_handle_vblank(rdev, 1);
6434                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6435                                         DRM_DEBUG("IH: D2 vblank\n");
6436                                 }
6437                                 break;
6438                         case 1: /* D2 vline */
6439                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6440                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6441                                         DRM_DEBUG("IH: D2 vline\n");
6442                                 }
6443                                 break;
6444                         default:
6445                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6446                                 break;
6447                         }
6448                         break;
6449                 case 3: /* D3 vblank/vline */
6450                         switch (src_data) {
6451                         case 0: /* D3 vblank */
6452                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6453                                         if (rdev->irq.crtc_vblank_int[2]) {
6454                                                 drm_handle_vblank(rdev->ddev, 2);
6455                                                 rdev->pm.vblank_sync = true;
6456                                                 wake_up(&rdev->irq.vblank_queue);
6457                                         }
6458                                         if (atomic_read(&rdev->irq.pflip[2]))
6459                                                 radeon_crtc_handle_vblank(rdev, 2);
6460                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6461                                         DRM_DEBUG("IH: D3 vblank\n");
6462                                 }
6463                                 break;
6464                         case 1: /* D3 vline */
6465                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6466                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6467                                         DRM_DEBUG("IH: D3 vline\n");
6468                                 }
6469                                 break;
6470                         default:
6471                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6472                                 break;
6473                         }
6474                         break;
6475                 case 4: /* D4 vblank/vline */
6476                         switch (src_data) {
6477                         case 0: /* D4 vblank */
6478                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6479                                         if (rdev->irq.crtc_vblank_int[3]) {
6480                                                 drm_handle_vblank(rdev->ddev, 3);
6481                                                 rdev->pm.vblank_sync = true;
6482                                                 wake_up(&rdev->irq.vblank_queue);
6483                                         }
6484                                         if (atomic_read(&rdev->irq.pflip[3]))
6485                                                 radeon_crtc_handle_vblank(rdev, 3);
6486                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6487                                         DRM_DEBUG("IH: D4 vblank\n");
6488                                 }
6489                                 break;
6490                         case 1: /* D4 vline */
6491                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6492                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6493                                         DRM_DEBUG("IH: D4 vline\n");
6494                                 }
6495                                 break;
6496                         default:
6497                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6498                                 break;
6499                         }
6500                         break;
6501                 case 5: /* D5 vblank/vline */
6502                         switch (src_data) {
6503                         case 0: /* D5 vblank */
6504                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6505                                         if (rdev->irq.crtc_vblank_int[4]) {
6506                                                 drm_handle_vblank(rdev->ddev, 4);
6507                                                 rdev->pm.vblank_sync = true;
6508                                                 wake_up(&rdev->irq.vblank_queue);
6509                                         }
6510                                         if (atomic_read(&rdev->irq.pflip[4]))
6511                                                 radeon_crtc_handle_vblank(rdev, 4);
6512                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6513                                         DRM_DEBUG("IH: D5 vblank\n");
6514                                 }
6515                                 break;
6516                         case 1: /* D5 vline */
6517                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6518                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6519                                         DRM_DEBUG("IH: D5 vline\n");
6520                                 }
6521                                 break;
6522                         default:
6523                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6524                                 break;
6525                         }
6526                         break;
6527                 case 6: /* D6 vblank/vline */
6528                         switch (src_data) {
6529                         case 0: /* D6 vblank */
6530                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6531                                         if (rdev->irq.crtc_vblank_int[5]) {
6532                                                 drm_handle_vblank(rdev->ddev, 5);
6533                                                 rdev->pm.vblank_sync = true;
6534                                                 wake_up(&rdev->irq.vblank_queue);
6535                                         }
6536                                         if (atomic_read(&rdev->irq.pflip[5]))
6537                                                 radeon_crtc_handle_vblank(rdev, 5);
6538                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6539                                         DRM_DEBUG("IH: D6 vblank\n");
6540                                 }
6541                                 break;
6542                         case 1: /* D6 vline */
6543                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6544                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6545                                         DRM_DEBUG("IH: D6 vline\n");
6546                                 }
6547                                 break;
6548                         default:
6549                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6550                                 break;
6551                         }
6552                         break;
6553                 case 8: /* D1 page flip */
6554                 case 10: /* D2 page flip */
6555                 case 12: /* D3 page flip */
6556                 case 14: /* D4 page flip */
6557                 case 16: /* D5 page flip */
6558                 case 18: /* D6 page flip */
6559                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6560                         if (radeon_use_pflipirq > 0)
6561                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6562                         break;
6563                 case 42: /* HPD hotplug */
6564                         switch (src_data) {
6565                         case 0:
6566                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6567                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6568                                         queue_hotplug = true;
6569                                         DRM_DEBUG("IH: HPD1\n");
6570                                 }
6571                                 break;
6572                         case 1:
6573                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6574                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6575                                         queue_hotplug = true;
6576                                         DRM_DEBUG("IH: HPD2\n");
6577                                 }
6578                                 break;
6579                         case 2:
6580                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6581                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6582                                         queue_hotplug = true;
6583                                         DRM_DEBUG("IH: HPD3\n");
6584                                 }
6585                                 break;
6586                         case 3:
6587                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6588                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6589                                         queue_hotplug = true;
6590                                         DRM_DEBUG("IH: HPD4\n");
6591                                 }
6592                                 break;
6593                         case 4:
6594                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6595                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6596                                         queue_hotplug = true;
6597                                         DRM_DEBUG("IH: HPD5\n");
6598                                 }
6599                                 break;
6600                         case 5:
6601                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6602                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6603                                         queue_hotplug = true;
6604                                         DRM_DEBUG("IH: HPD6\n");
6605                                 }
6606                                 break;
6607                         default:
6608                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6609                                 break;
6610                         }
6611                         break;
6612                 case 124: /* UVD */
6613                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6614                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6615                         break;
6616                 case 146:
6617                 case 147:
6618                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6619                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6620                         /* reset addr and status */
6621                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6622                         if (addr == 0x0 && status == 0x0)
6623                                 break;
6624                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6625                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6626                                 addr);
6627                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6628                                 status);
6629                         si_vm_decode_fault(rdev, status, addr);
6630                         break;
6631                 case 176: /* RINGID0 CP_INT */
6632                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6633                         break;
6634                 case 177: /* RINGID1 CP_INT */
6635                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6636                         break;
6637                 case 178: /* RINGID2 CP_INT */
6638                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6639                         break;
6640                 case 181: /* CP EOP event */
6641                         DRM_DEBUG("IH: CP EOP\n");
6642                         switch (ring_id) {
6643                         case 0:
6644                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6645                                 break;
6646                         case 1:
6647                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6648                                 break;
6649                         case 2:
6650                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6651                                 break;
6652                         }
6653                         break;
6654                 case 224: /* DMA trap event */
6655                         DRM_DEBUG("IH: DMA trap\n");
6656                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6657                         break;
6658                 case 230: /* thermal low to high */
6659                         DRM_DEBUG("IH: thermal low to high\n");
6660                         rdev->pm.dpm.thermal.high_to_low = false;
6661                         queue_thermal = true;
6662                         break;
6663                 case 231: /* thermal high to low */
6664                         DRM_DEBUG("IH: thermal high to low\n");
6665                         rdev->pm.dpm.thermal.high_to_low = true;
6666                         queue_thermal = true;
6667                         break;
6668                 case 233: /* GUI IDLE */
6669                         DRM_DEBUG("IH: GUI idle\n");
6670                         break;
6671                 case 244: /* DMA trap event */
6672                         DRM_DEBUG("IH: DMA1 trap\n");
6673                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6674                         break;
6675                 default:
6676                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6677                         break;
6678                 }
6679
6680                 /* wptr/rptr are in bytes! */
6681                 rptr += 16;
6682                 rptr &= rdev->ih.ptr_mask;
6683                 WREG32(IH_RB_RPTR, rptr);
6684         }
6685         if (queue_hotplug)
6686                 schedule_work(&rdev->hotplug_work);
6687         if (queue_thermal && rdev->pm.dpm_enabled)
6688                 schedule_work(&rdev->pm.dpm.thermal.work);
6689         rdev->ih.rptr = rptr;
6690         atomic_set(&rdev->ih.lock, 0);
6691
6692         /* make sure wptr hasn't changed while processing */
6693         wptr = si_get_ih_wptr(rdev);
6694         if (wptr != rptr)
6695                 goto restart_ih;
6696
6697         return IRQ_HANDLED;
6698 }
6699
6700 /*
6701  * startup/shutdown callbacks
6702  */
6703 static int si_startup(struct radeon_device *rdev)
6704 {
6705         struct radeon_ring *ring;
6706         int r;
6707
6708         /* enable pcie gen2/3 link */
6709         si_pcie_gen3_enable(rdev);
6710         /* enable aspm */
6711         si_program_aspm(rdev);
6712
6713         /* scratch needs to be initialized before MC */
6714         r = r600_vram_scratch_init(rdev);
6715         if (r)
6716                 return r;
6717
6718         si_mc_program(rdev);
6719
6720         if (!rdev->pm.dpm_enabled) {
6721                 r = si_mc_load_microcode(rdev);
6722                 if (r) {
6723                         DRM_ERROR("Failed to load MC firmware!\n");
6724                         return r;
6725                 }
6726         }
6727
6728         r = si_pcie_gart_enable(rdev);
6729         if (r)
6730                 return r;
6731         si_gpu_init(rdev);
6732
6733         /* allocate rlc buffers */
6734         if (rdev->family == CHIP_VERDE) {
6735                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6736                 rdev->rlc.reg_list_size =
6737                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6738         }
6739         rdev->rlc.cs_data = si_cs_data;
6740         r = sumo_rlc_init(rdev);
6741         if (r) {
6742                 DRM_ERROR("Failed to init rlc BOs!\n");
6743                 return r;
6744         }
6745
6746         /* allocate wb buffer */
6747         r = radeon_wb_init(rdev);
6748         if (r)
6749                 return r;
6750
6751         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6752         if (r) {
6753                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6754                 return r;
6755         }
6756
6757         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6758         if (r) {
6759                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6760                 return r;
6761         }
6762
6763         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6764         if (r) {
6765                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6766                 return r;
6767         }
6768
6769         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6770         if (r) {
6771                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6772                 return r;
6773         }
6774
6775         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6776         if (r) {
6777                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6778                 return r;
6779         }
6780
6781         if (rdev->has_uvd) {
6782                 r = uvd_v2_2_resume(rdev);
6783                 if (!r) {
6784                         r = radeon_fence_driver_start_ring(rdev,
6785                                                            R600_RING_TYPE_UVD_INDEX);
6786                         if (r)
6787                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6788                 }
6789                 if (r)
6790                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6791         }
6792
6793         /* Enable IRQ */
6794         if (!rdev->irq.installed) {
6795                 r = radeon_irq_kms_init(rdev);
6796                 if (r)
6797                         return r;
6798         }
6799
6800         r = si_irq_init(rdev);
6801         if (r) {
6802                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6803                 radeon_irq_kms_fini(rdev);
6804                 return r;
6805         }
6806         si_irq_set(rdev);
6807
6808         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6809         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6810                              RADEON_CP_PACKET2);
6811         if (r)
6812                 return r;
6813
6814         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6815         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6816                              RADEON_CP_PACKET2);
6817         if (r)
6818                 return r;
6819
6820         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6821         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6822                              RADEON_CP_PACKET2);
6823         if (r)
6824                 return r;
6825
6826         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6827         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6828                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6829         if (r)
6830                 return r;
6831
6832         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6833         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6834                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6835         if (r)
6836                 return r;
6837
6838         r = si_cp_load_microcode(rdev);
6839         if (r)
6840                 return r;
6841         r = si_cp_resume(rdev);
6842         if (r)
6843                 return r;
6844
6845         r = cayman_dma_resume(rdev);
6846         if (r)
6847                 return r;
6848
6849         if (rdev->has_uvd) {
6850                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6851                 if (ring->ring_size) {
6852                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6853                                              RADEON_CP_PACKET2);
6854                         if (!r)
6855                                 r = uvd_v1_0_init(rdev);
6856                         if (r)
6857                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6858                 }
6859         }
6860
6861         r = radeon_ib_pool_init(rdev);
6862         if (r) {
6863                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6864                 return r;
6865         }
6866
6867         r = radeon_vm_manager_init(rdev);
6868         if (r) {
6869                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6870                 return r;
6871         }
6872
6873         r = radeon_audio_init(rdev);
6874         if (r)
6875                 return r;
6876
6877         return 0;
6878 }
6879
6880 int si_resume(struct radeon_device *rdev)
6881 {
6882         int r;
6883
6884         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6885          * posting will perform necessary task to bring back GPU into good
6886          * shape.
6887          */
6888         /* post card */
6889         atom_asic_init(rdev->mode_info.atom_context);
6890
6891         /* init golden registers */
6892         si_init_golden_registers(rdev);
6893
6894         if (rdev->pm.pm_method == PM_METHOD_DPM)
6895                 radeon_pm_resume(rdev);
6896
6897         rdev->accel_working = true;
6898         r = si_startup(rdev);
6899         if (r) {
6900                 DRM_ERROR("si startup failed on resume\n");
6901                 rdev->accel_working = false;
6902                 return r;
6903         }
6904
6905         return r;
6906
6907 }
6908
6909 int si_suspend(struct radeon_device *rdev)
6910 {
6911         radeon_pm_suspend(rdev);
6912         radeon_audio_fini(rdev);
6913         radeon_vm_manager_fini(rdev);
6914         si_cp_enable(rdev, false);
6915         cayman_dma_stop(rdev);
6916         if (rdev->has_uvd) {
6917                 uvd_v1_0_fini(rdev);
6918                 radeon_uvd_suspend(rdev);
6919         }
6920         si_fini_pg(rdev);
6921         si_fini_cg(rdev);
6922         si_irq_suspend(rdev);
6923         radeon_wb_disable(rdev);
6924         si_pcie_gart_disable(rdev);
6925         return 0;
6926 }
6927
6928 /* Plan is to move initialization in that function and use
6929  * helper function so that radeon_device_init pretty much
6930  * do nothing more than calling asic specific function. This
6931  * should also allow to remove a bunch of callback function
6932  * like vram_info.
6933  */
6934 int si_init(struct radeon_device *rdev)
6935 {
6936         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6937         int r;
6938
6939         /* Read BIOS */
6940         if (!radeon_get_bios(rdev)) {
6941                 if (ASIC_IS_AVIVO(rdev))
6942                         return -EINVAL;
6943         }
6944         /* Must be an ATOMBIOS */
6945         if (!rdev->is_atom_bios) {
6946                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6947                 return -EINVAL;
6948         }
6949         r = radeon_atombios_init(rdev);
6950         if (r)
6951                 return r;
6952
6953         /* Post card if necessary */
6954         if (!radeon_card_posted(rdev)) {
6955                 if (!rdev->bios) {
6956                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6957                         return -EINVAL;
6958                 }
6959                 DRM_INFO("GPU not posted. posting now...\n");
6960                 atom_asic_init(rdev->mode_info.atom_context);
6961         }
6962         /* init golden registers */
6963         si_init_golden_registers(rdev);
6964         /* Initialize scratch registers */
6965         si_scratch_init(rdev);
6966         /* Initialize surface registers */
6967         radeon_surface_init(rdev);
6968         /* Initialize clocks */
6969         radeon_get_clock_info(rdev->ddev);
6970
6971         /* Fence driver */
6972         r = radeon_fence_driver_init(rdev);
6973         if (r)
6974                 return r;
6975
6976         /* initialize memory controller */
6977         r = si_mc_init(rdev);
6978         if (r)
6979                 return r;
6980         /* Memory manager */
6981         r = radeon_bo_init(rdev);
6982         if (r)
6983                 return r;
6984
6985         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6986             !rdev->rlc_fw || !rdev->mc_fw) {
6987                 r = si_init_microcode(rdev);
6988                 if (r) {
6989                         DRM_ERROR("Failed to load firmware!\n");
6990                         return r;
6991                 }
6992         }
6993
6994         /* Initialize power management */
6995         radeon_pm_init(rdev);
6996
6997         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6998         ring->ring_obj = NULL;
6999         r600_ring_init(rdev, ring, 1024 * 1024);
7000
7001         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7002         ring->ring_obj = NULL;
7003         r600_ring_init(rdev, ring, 1024 * 1024);
7004
7005         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7006         ring->ring_obj = NULL;
7007         r600_ring_init(rdev, ring, 1024 * 1024);
7008
7009         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7010         ring->ring_obj = NULL;
7011         r600_ring_init(rdev, ring, 64 * 1024);
7012
7013         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7014         ring->ring_obj = NULL;
7015         r600_ring_init(rdev, ring, 64 * 1024);
7016
7017         if (rdev->has_uvd) {
7018                 r = radeon_uvd_init(rdev);
7019                 if (!r) {
7020                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7021                         ring->ring_obj = NULL;
7022                         r600_ring_init(rdev, ring, 4096);
7023                 }
7024         }
7025
7026         rdev->ih.ring_obj = NULL;
7027         r600_ih_ring_init(rdev, 64 * 1024);
7028
7029         r = r600_pcie_gart_init(rdev);
7030         if (r)
7031                 return r;
7032
7033         rdev->accel_working = true;
7034         r = si_startup(rdev);
7035         if (r) {
7036                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7037                 si_cp_fini(rdev);
7038                 cayman_dma_fini(rdev);
7039                 si_irq_fini(rdev);
7040                 sumo_rlc_fini(rdev);
7041                 radeon_wb_fini(rdev);
7042                 radeon_ib_pool_fini(rdev);
7043                 radeon_vm_manager_fini(rdev);
7044                 radeon_irq_kms_fini(rdev);
7045                 si_pcie_gart_fini(rdev);
7046                 rdev->accel_working = false;
7047         }
7048
7049         /* Don't start up if the MC ucode is missing.
7050          * The default clocks and voltages before the MC ucode
7051          * is loaded are not suffient for advanced operations.
7052          */
7053         if (!rdev->mc_fw) {
7054                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7055                 return -EINVAL;
7056         }
7057
7058         return 0;
7059 }
7060
7061 void si_fini(struct radeon_device *rdev)
7062 {
7063         radeon_pm_fini(rdev);
7064         si_cp_fini(rdev);
7065         cayman_dma_fini(rdev);
7066         si_fini_pg(rdev);
7067         si_fini_cg(rdev);
7068         si_irq_fini(rdev);
7069         sumo_rlc_fini(rdev);
7070         radeon_wb_fini(rdev);
7071         radeon_vm_manager_fini(rdev);
7072         radeon_ib_pool_fini(rdev);
7073         radeon_irq_kms_fini(rdev);
7074         if (rdev->has_uvd) {
7075                 uvd_v1_0_fini(rdev);
7076                 radeon_uvd_fini(rdev);
7077         }
7078         si_pcie_gart_fini(rdev);
7079         r600_vram_scratch_fini(rdev);
7080         radeon_gem_fini(rdev);
7081         radeon_fence_driver_fini(rdev);
7082         radeon_bo_fini(rdev);
7083         radeon_atombios_fini(rdev);
7084         kfree(rdev->bios);
7085         rdev->bios = NULL;
7086 }
7087
7088 /**
7089  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7090  *
7091  * @rdev: radeon_device pointer
7092  *
7093  * Fetches a GPU clock counter snapshot (SI).
7094  * Returns the 64 bit clock counter snapshot.
7095  */
7096 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7097 {
7098         uint64_t clock;
7099
7100         mutex_lock(&rdev->gpu_clock_mutex);
7101         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7102         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7103                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7104         mutex_unlock(&rdev->gpu_clock_mutex);
7105         return clock;
7106 }
7107
7108 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7109 {
7110         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7111         int r;
7112
7113         /* bypass vclk and dclk with bclk */
7114         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7115                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7116                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7117
7118         /* put PLL in bypass mode */
7119         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7120
7121         if (!vclk || !dclk) {
7122                 /* keep the Bypass mode, put PLL to sleep */
7123                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7124                 return 0;
7125         }
7126
7127         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7128                                           16384, 0x03FFFFFF, 0, 128, 5,
7129                                           &fb_div, &vclk_div, &dclk_div);
7130         if (r)
7131                 return r;
7132
7133         /* set RESET_ANTI_MUX to 0 */
7134         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7135
7136         /* set VCO_MODE to 1 */
7137         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7138
7139         /* toggle UPLL_SLEEP to 1 then back to 0 */
7140         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7141         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7142
7143         /* deassert UPLL_RESET */
7144         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7145
7146         mdelay(1);
7147
7148         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7149         if (r)
7150                 return r;
7151
7152         /* assert UPLL_RESET again */
7153         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7154
7155         /* disable spread spectrum. */
7156         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7157
7158         /* set feedback divider */
7159         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7160
7161         /* set ref divider to 0 */
7162         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7163
7164         if (fb_div < 307200)
7165                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7166         else
7167                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7168
7169         /* set PDIV_A and PDIV_B */
7170         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7171                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7172                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7173
7174         /* give the PLL some time to settle */
7175         mdelay(15);
7176
7177         /* deassert PLL_RESET */
7178         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7179
7180         mdelay(15);
7181
7182         /* switch from bypass mode to normal mode */
7183         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7184
7185         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7186         if (r)
7187                 return r;
7188
7189         /* switch VCLK and DCLK selection */
7190         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7191                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7192                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7193
7194         mdelay(100);
7195
7196         return 0;
7197 }
7198
7199 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7200 {
7201         struct pci_dev *root = rdev->pdev->bus->self;
7202         int bridge_pos, gpu_pos;
7203         u32 speed_cntl, mask, current_data_rate;
7204         int ret, i;
7205         u16 tmp16;
7206
7207         if (pci_is_root_bus(rdev->pdev->bus))
7208                 return;
7209
7210         if (radeon_pcie_gen2 == 0)
7211                 return;
7212
7213         if (rdev->flags & RADEON_IS_IGP)
7214                 return;
7215
7216         if (!(rdev->flags & RADEON_IS_PCIE))
7217                 return;
7218
7219         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7220         if (ret != 0)
7221                 return;
7222
7223         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7224                 return;
7225
7226         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7227         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7228                 LC_CURRENT_DATA_RATE_SHIFT;
7229         if (mask & DRM_PCIE_SPEED_80) {
7230                 if (current_data_rate == 2) {
7231                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7232                         return;
7233                 }
7234                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7235         } else if (mask & DRM_PCIE_SPEED_50) {
7236                 if (current_data_rate == 1) {
7237                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7238                         return;
7239                 }
7240                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7241         }
7242
7243         bridge_pos = pci_pcie_cap(root);
7244         if (!bridge_pos)
7245                 return;
7246
7247         gpu_pos = pci_pcie_cap(rdev->pdev);
7248         if (!gpu_pos)
7249                 return;
7250
7251         if (mask & DRM_PCIE_SPEED_80) {
7252                 /* re-try equalization if gen3 is not already enabled */
7253                 if (current_data_rate != 2) {
7254                         u16 bridge_cfg, gpu_cfg;
7255                         u16 bridge_cfg2, gpu_cfg2;
7256                         u32 max_lw, current_lw, tmp;
7257
7258                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7259                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7260
7261                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7262                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7263
7264                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7265                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7266
7267                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7268                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7269                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7270
7271                         if (current_lw < max_lw) {
7272                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7273                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7274                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7275                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7276                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7277                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7278                                 }
7279                         }
7280
7281                         for (i = 0; i < 10; i++) {
7282                                 /* check status */
7283                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7284                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7285                                         break;
7286
7287                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7288                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7289
7290                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7291                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7292
7293                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7294                                 tmp |= LC_SET_QUIESCE;
7295                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7296
7297                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7298                                 tmp |= LC_REDO_EQ;
7299                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7300
7301                                 mdelay(100);
7302
7303                                 /* linkctl */
7304                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7305                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7306                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7307                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7308
7309                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7310                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7311                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7312                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7313
7314                                 /* linkctl2 */
7315                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7316                                 tmp16 &= ~((1 << 4) | (7 << 9));
7317                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7318                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7319
7320                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7321                                 tmp16 &= ~((1 << 4) | (7 << 9));
7322                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7323                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7324
7325                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7326                                 tmp &= ~LC_SET_QUIESCE;
7327                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7328                         }
7329                 }
7330         }
7331
7332         /* set the link speed */
7333         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7334         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7335         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7336
7337         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7338         tmp16 &= ~0xf;
7339         if (mask & DRM_PCIE_SPEED_80)
7340                 tmp16 |= 3; /* gen3 */
7341         else if (mask & DRM_PCIE_SPEED_50)
7342                 tmp16 |= 2; /* gen2 */
7343         else
7344                 tmp16 |= 1; /* gen1 */
7345         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7346
7347         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7348         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7349         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7350
7351         for (i = 0; i < rdev->usec_timeout; i++) {
7352                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7353                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7354                         break;
7355                 udelay(1);
7356         }
7357 }
7358
7359 static void si_program_aspm(struct radeon_device *rdev)
7360 {
7361         u32 data, orig;
7362         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7363         bool disable_clkreq = false;
7364
7365         if (radeon_aspm == 0)
7366                 return;
7367
7368         if (!(rdev->flags & RADEON_IS_PCIE))
7369                 return;
7370
7371         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7372         data &= ~LC_XMIT_N_FTS_MASK;
7373         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7374         if (orig != data)
7375                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7376
7377         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7378         data |= LC_GO_TO_RECOVERY;
7379         if (orig != data)
7380                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7381
7382         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7383         data |= P_IGNORE_EDB_ERR;
7384         if (orig != data)
7385                 WREG32_PCIE(PCIE_P_CNTL, data);
7386
7387         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7388         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7389         data |= LC_PMI_TO_L1_DIS;
7390         if (!disable_l0s)
7391                 data |= LC_L0S_INACTIVITY(7);
7392
7393         if (!disable_l1) {
7394                 data |= LC_L1_INACTIVITY(7);
7395                 data &= ~LC_PMI_TO_L1_DIS;
7396                 if (orig != data)
7397                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7398
7399                 if (!disable_plloff_in_l1) {
7400                         bool clk_req_support;
7401
7402                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7403                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7404                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7405                         if (orig != data)
7406                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7407
7408                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7409                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7410                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7411                         if (orig != data)
7412                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7413
7414                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7415                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7416                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7417                         if (orig != data)
7418                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7419
7420                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7421                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7422                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7423                         if (orig != data)
7424                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7425
7426                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7427                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7428                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7429                                 if (orig != data)
7430                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7431
7432                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7433                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7434                                 if (orig != data)
7435                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7436
7437                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7438                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7439                                 if (orig != data)
7440                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7441
7442                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7443                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7444                                 if (orig != data)
7445                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7446
7447                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7448                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7449                                 if (orig != data)
7450                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7451
7452                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7453                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7454                                 if (orig != data)
7455                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7456
7457                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7458                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7459                                 if (orig != data)
7460                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7461
7462                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7463                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7464                                 if (orig != data)
7465                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7466                         }
7467                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7468                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7469                         data |= LC_DYN_LANES_PWR_STATE(3);
7470                         if (orig != data)
7471                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7472
7473                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7474                         data &= ~LS2_EXIT_TIME_MASK;
7475                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7476                                 data |= LS2_EXIT_TIME(5);
7477                         if (orig != data)
7478                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7479
7480                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7481                         data &= ~LS2_EXIT_TIME_MASK;
7482                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7483                                 data |= LS2_EXIT_TIME(5);
7484                         if (orig != data)
7485                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7486
7487                         if (!disable_clkreq &&
7488                             !pci_is_root_bus(rdev->pdev->bus)) {
7489                                 struct pci_dev *root = rdev->pdev->bus->self;
7490                                 u32 lnkcap;
7491
7492                                 clk_req_support = false;
7493                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7494                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7495                                         clk_req_support = true;
7496                         } else {
7497                                 clk_req_support = false;
7498                         }
7499
7500                         if (clk_req_support) {
7501                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7502                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7503                                 if (orig != data)
7504                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7505
7506                                 orig = data = RREG32(THM_CLK_CNTL);
7507                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7508                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7509                                 if (orig != data)
7510                                         WREG32(THM_CLK_CNTL, data);
7511
7512                                 orig = data = RREG32(MISC_CLK_CNTL);
7513                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7514                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7515                                 if (orig != data)
7516                                         WREG32(MISC_CLK_CNTL, data);
7517
7518                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7519                                 data &= ~BCLK_AS_XCLK;
7520                                 if (orig != data)
7521                                         WREG32(CG_CLKPIN_CNTL, data);
7522
7523                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7524                                 data &= ~FORCE_BIF_REFCLK_EN;
7525                                 if (orig != data)
7526                                         WREG32(CG_CLKPIN_CNTL_2, data);
7527
7528                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7529                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7530                                 data |= MPLL_CLKOUT_SEL(4);
7531                                 if (orig != data)
7532                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7533
7534                                 orig = data = RREG32(SPLL_CNTL_MODE);
7535                                 data &= ~SPLL_REFCLK_SEL_MASK;
7536                                 if (orig != data)
7537                                         WREG32(SPLL_CNTL_MODE, data);
7538                         }
7539                 }
7540         } else {
7541                 if (orig != data)
7542                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7543         }
7544
7545         orig = data = RREG32_PCIE(PCIE_CNTL2);
7546         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7547         if (orig != data)
7548                 WREG32_PCIE(PCIE_CNTL2, data);
7549
7550         if (!disable_l0s) {
7551                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7552                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7553                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7554                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7555                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7556                                 data &= ~LC_L0S_INACTIVITY_MASK;
7557                                 if (orig != data)
7558                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7559                         }
7560                 }
7561         }
7562 }