drm/radeon: make VM size a module parameter (v2)
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
52 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
53 MODULE_FIRMWARE("radeon/VERDE_me.bin");
54 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
55 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
59 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
60 MODULE_FIRMWARE("radeon/OLAND_me.bin");
61 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
62 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
63 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
64 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
65 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
68 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
69 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
70 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
71 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
72 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
73
74 static void si_pcie_gen3_enable(struct radeon_device *rdev);
75 static void si_program_aspm(struct radeon_device *rdev);
76 extern void sumo_rlc_fini(struct radeon_device *rdev);
77 extern int sumo_rlc_init(struct radeon_device *rdev);
78 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
79 extern void r600_ih_ring_fini(struct radeon_device *rdev);
80 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
81 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
82 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
83 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
84 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
85 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
86 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
87                                          bool enable);
88 static void si_init_pg(struct radeon_device *rdev);
89 static void si_init_cg(struct radeon_device *rdev);
90 static void si_fini_pg(struct radeon_device *rdev);
91 static void si_fini_cg(struct radeon_device *rdev);
92 static void si_rlc_stop(struct radeon_device *rdev);
93
94 static const u32 verde_rlc_save_restore_register_list[] =
95 {
96         (0x8000 << 16) | (0x98f4 >> 2),
97         0x00000000,
98         (0x8040 << 16) | (0x98f4 >> 2),
99         0x00000000,
100         (0x8000 << 16) | (0xe80 >> 2),
101         0x00000000,
102         (0x8040 << 16) | (0xe80 >> 2),
103         0x00000000,
104         (0x8000 << 16) | (0x89bc >> 2),
105         0x00000000,
106         (0x8040 << 16) | (0x89bc >> 2),
107         0x00000000,
108         (0x8000 << 16) | (0x8c1c >> 2),
109         0x00000000,
110         (0x8040 << 16) | (0x8c1c >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0x98f0 >> 2),
113         0x00000000,
114         (0x9c00 << 16) | (0xe7c >> 2),
115         0x00000000,
116         (0x8000 << 16) | (0x9148 >> 2),
117         0x00000000,
118         (0x8040 << 16) | (0x9148 >> 2),
119         0x00000000,
120         (0x9c00 << 16) | (0x9150 >> 2),
121         0x00000000,
122         (0x9c00 << 16) | (0x897c >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0x8d8c >> 2),
125         0x00000000,
126         (0x9c00 << 16) | (0xac54 >> 2),
127         0X00000000,
128         0x3,
129         (0x9c00 << 16) | (0x98f8 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x9910 >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x9914 >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x9918 >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x991c >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x9920 >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x9924 >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x9928 >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x992c >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x9930 >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x9934 >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x9938 >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x993c >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x9940 >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x9944 >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x9948 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x994c >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x9950 >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x9954 >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x9958 >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x995c >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x9960 >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9964 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9968 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x996c >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x9970 >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x9974 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x9978 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x997c >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x9980 >> 2),
188         0x00000000,
189         (0x9c00 << 16) | (0x9984 >> 2),
190         0x00000000,
191         (0x9c00 << 16) | (0x9988 >> 2),
192         0x00000000,
193         (0x9c00 << 16) | (0x998c >> 2),
194         0x00000000,
195         (0x9c00 << 16) | (0x8c00 >> 2),
196         0x00000000,
197         (0x9c00 << 16) | (0x8c14 >> 2),
198         0x00000000,
199         (0x9c00 << 16) | (0x8c04 >> 2),
200         0x00000000,
201         (0x9c00 << 16) | (0x8c08 >> 2),
202         0x00000000,
203         (0x8000 << 16) | (0x9b7c >> 2),
204         0x00000000,
205         (0x8040 << 16) | (0x9b7c >> 2),
206         0x00000000,
207         (0x8000 << 16) | (0xe84 >> 2),
208         0x00000000,
209         (0x8040 << 16) | (0xe84 >> 2),
210         0x00000000,
211         (0x8000 << 16) | (0x89c0 >> 2),
212         0x00000000,
213         (0x8040 << 16) | (0x89c0 >> 2),
214         0x00000000,
215         (0x8000 << 16) | (0x914c >> 2),
216         0x00000000,
217         (0x8040 << 16) | (0x914c >> 2),
218         0x00000000,
219         (0x8000 << 16) | (0x8c20 >> 2),
220         0x00000000,
221         (0x8040 << 16) | (0x8c20 >> 2),
222         0x00000000,
223         (0x8000 << 16) | (0x9354 >> 2),
224         0x00000000,
225         (0x8040 << 16) | (0x9354 >> 2),
226         0x00000000,
227         (0x9c00 << 16) | (0x9060 >> 2),
228         0x00000000,
229         (0x9c00 << 16) | (0x9364 >> 2),
230         0x00000000,
231         (0x9c00 << 16) | (0x9100 >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x913c >> 2),
234         0x00000000,
235         (0x8000 << 16) | (0x90e0 >> 2),
236         0x00000000,
237         (0x8000 << 16) | (0x90e4 >> 2),
238         0x00000000,
239         (0x8000 << 16) | (0x90e8 >> 2),
240         0x00000000,
241         (0x8040 << 16) | (0x90e0 >> 2),
242         0x00000000,
243         (0x8040 << 16) | (0x90e4 >> 2),
244         0x00000000,
245         (0x8040 << 16) | (0x90e8 >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x8bcc >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x8b24 >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x88c4 >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0x8e50 >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0x8c0c >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0x8e58 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0x8e5c >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0x9508 >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0x950c >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0x9494 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0xac0c >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0xac10 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0xac14 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0xae00 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0xac08 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x88d4 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x88c8 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x88cc >> 2),
282         0x00000000,
283         (0x9c00 << 16) | (0x89b0 >> 2),
284         0x00000000,
285         (0x9c00 << 16) | (0x8b10 >> 2),
286         0x00000000,
287         (0x9c00 << 16) | (0x8a14 >> 2),
288         0x00000000,
289         (0x9c00 << 16) | (0x9830 >> 2),
290         0x00000000,
291         (0x9c00 << 16) | (0x9834 >> 2),
292         0x00000000,
293         (0x9c00 << 16) | (0x9838 >> 2),
294         0x00000000,
295         (0x9c00 << 16) | (0x9a10 >> 2),
296         0x00000000,
297         (0x8000 << 16) | (0x9870 >> 2),
298         0x00000000,
299         (0x8000 << 16) | (0x9874 >> 2),
300         0x00000000,
301         (0x8001 << 16) | (0x9870 >> 2),
302         0x00000000,
303         (0x8001 << 16) | (0x9874 >> 2),
304         0x00000000,
305         (0x8040 << 16) | (0x9870 >> 2),
306         0x00000000,
307         (0x8040 << 16) | (0x9874 >> 2),
308         0x00000000,
309         (0x8041 << 16) | (0x9870 >> 2),
310         0x00000000,
311         (0x8041 << 16) | (0x9874 >> 2),
312         0x00000000,
313         0x00000000
314 };
315
316 static const u32 tahiti_golden_rlc_registers[] =
317 {
318         0xc424, 0xffffffff, 0x00601005,
319         0xc47c, 0xffffffff, 0x10104040,
320         0xc488, 0xffffffff, 0x0100000a,
321         0xc314, 0xffffffff, 0x00000800,
322         0xc30c, 0xffffffff, 0x800000f4,
323         0xf4a8, 0xffffffff, 0x00000000
324 };
325
326 static const u32 tahiti_golden_registers[] =
327 {
328         0x9a10, 0x00010000, 0x00018208,
329         0x9830, 0xffffffff, 0x00000000,
330         0x9834, 0xf00fffff, 0x00000400,
331         0x9838, 0x0002021c, 0x00020200,
332         0xc78, 0x00000080, 0x00000000,
333         0xd030, 0x000300c0, 0x00800040,
334         0xd830, 0x000300c0, 0x00800040,
335         0x5bb0, 0x000000f0, 0x00000070,
336         0x5bc0, 0x00200000, 0x50100000,
337         0x7030, 0x31000311, 0x00000011,
338         0x277c, 0x00000003, 0x000007ff,
339         0x240c, 0x000007ff, 0x00000000,
340         0x8a14, 0xf000001f, 0x00000007,
341         0x8b24, 0xffffffff, 0x00ffffff,
342         0x8b10, 0x0000ff0f, 0x00000000,
343         0x28a4c, 0x07ffffff, 0x4e000000,
344         0x28350, 0x3f3f3fff, 0x2a00126a,
345         0x30, 0x000000ff, 0x0040,
346         0x34, 0x00000040, 0x00004040,
347         0x9100, 0x07ffffff, 0x03000000,
348         0x8e88, 0x01ff1f3f, 0x00000000,
349         0x8e84, 0x01ff1f3f, 0x00000000,
350         0x9060, 0x0000007f, 0x00000020,
351         0x9508, 0x00010000, 0x00010000,
352         0xac14, 0x00000200, 0x000002fb,
353         0xac10, 0xffffffff, 0x0000543b,
354         0xac0c, 0xffffffff, 0xa9210876,
355         0x88d0, 0xffffffff, 0x000fff40,
356         0x88d4, 0x0000001f, 0x00000010,
357         0x1410, 0x20000000, 0x20fffed8,
358         0x15c0, 0x000c0fc0, 0x000c0400
359 };
360
361 static const u32 tahiti_golden_registers2[] =
362 {
363         0xc64, 0x00000001, 0x00000001
364 };
365
366 static const u32 pitcairn_golden_rlc_registers[] =
367 {
368         0xc424, 0xffffffff, 0x00601004,
369         0xc47c, 0xffffffff, 0x10102020,
370         0xc488, 0xffffffff, 0x01000020,
371         0xc314, 0xffffffff, 0x00000800,
372         0xc30c, 0xffffffff, 0x800000a4
373 };
374
375 static const u32 pitcairn_golden_registers[] =
376 {
377         0x9a10, 0x00010000, 0x00018208,
378         0x9830, 0xffffffff, 0x00000000,
379         0x9834, 0xf00fffff, 0x00000400,
380         0x9838, 0x0002021c, 0x00020200,
381         0xc78, 0x00000080, 0x00000000,
382         0xd030, 0x000300c0, 0x00800040,
383         0xd830, 0x000300c0, 0x00800040,
384         0x5bb0, 0x000000f0, 0x00000070,
385         0x5bc0, 0x00200000, 0x50100000,
386         0x7030, 0x31000311, 0x00000011,
387         0x2ae4, 0x00073ffe, 0x000022a2,
388         0x240c, 0x000007ff, 0x00000000,
389         0x8a14, 0xf000001f, 0x00000007,
390         0x8b24, 0xffffffff, 0x00ffffff,
391         0x8b10, 0x0000ff0f, 0x00000000,
392         0x28a4c, 0x07ffffff, 0x4e000000,
393         0x28350, 0x3f3f3fff, 0x2a00126a,
394         0x30, 0x000000ff, 0x0040,
395         0x34, 0x00000040, 0x00004040,
396         0x9100, 0x07ffffff, 0x03000000,
397         0x9060, 0x0000007f, 0x00000020,
398         0x9508, 0x00010000, 0x00010000,
399         0xac14, 0x000003ff, 0x000000f7,
400         0xac10, 0xffffffff, 0x00000000,
401         0xac0c, 0xffffffff, 0x32761054,
402         0x88d4, 0x0000001f, 0x00000010,
403         0x15c0, 0x000c0fc0, 0x000c0400
404 };
405
406 static const u32 verde_golden_rlc_registers[] =
407 {
408         0xc424, 0xffffffff, 0x033f1005,
409         0xc47c, 0xffffffff, 0x10808020,
410         0xc488, 0xffffffff, 0x00800008,
411         0xc314, 0xffffffff, 0x00001000,
412         0xc30c, 0xffffffff, 0x80010014
413 };
414
415 static const u32 verde_golden_registers[] =
416 {
417         0x9a10, 0x00010000, 0x00018208,
418         0x9830, 0xffffffff, 0x00000000,
419         0x9834, 0xf00fffff, 0x00000400,
420         0x9838, 0x0002021c, 0x00020200,
421         0xc78, 0x00000080, 0x00000000,
422         0xd030, 0x000300c0, 0x00800040,
423         0xd030, 0x000300c0, 0x00800040,
424         0xd830, 0x000300c0, 0x00800040,
425         0xd830, 0x000300c0, 0x00800040,
426         0x5bb0, 0x000000f0, 0x00000070,
427         0x5bc0, 0x00200000, 0x50100000,
428         0x7030, 0x31000311, 0x00000011,
429         0x2ae4, 0x00073ffe, 0x000022a2,
430         0x2ae4, 0x00073ffe, 0x000022a2,
431         0x2ae4, 0x00073ffe, 0x000022a2,
432         0x240c, 0x000007ff, 0x00000000,
433         0x240c, 0x000007ff, 0x00000000,
434         0x240c, 0x000007ff, 0x00000000,
435         0x8a14, 0xf000001f, 0x00000007,
436         0x8a14, 0xf000001f, 0x00000007,
437         0x8a14, 0xf000001f, 0x00000007,
438         0x8b24, 0xffffffff, 0x00ffffff,
439         0x8b10, 0x0000ff0f, 0x00000000,
440         0x28a4c, 0x07ffffff, 0x4e000000,
441         0x28350, 0x3f3f3fff, 0x0000124a,
442         0x28350, 0x3f3f3fff, 0x0000124a,
443         0x28350, 0x3f3f3fff, 0x0000124a,
444         0x30, 0x000000ff, 0x0040,
445         0x34, 0x00000040, 0x00004040,
446         0x9100, 0x07ffffff, 0x03000000,
447         0x9100, 0x07ffffff, 0x03000000,
448         0x8e88, 0x01ff1f3f, 0x00000000,
449         0x8e88, 0x01ff1f3f, 0x00000000,
450         0x8e88, 0x01ff1f3f, 0x00000000,
451         0x8e84, 0x01ff1f3f, 0x00000000,
452         0x8e84, 0x01ff1f3f, 0x00000000,
453         0x8e84, 0x01ff1f3f, 0x00000000,
454         0x9060, 0x0000007f, 0x00000020,
455         0x9508, 0x00010000, 0x00010000,
456         0xac14, 0x000003ff, 0x00000003,
457         0xac14, 0x000003ff, 0x00000003,
458         0xac14, 0x000003ff, 0x00000003,
459         0xac10, 0xffffffff, 0x00000000,
460         0xac10, 0xffffffff, 0x00000000,
461         0xac10, 0xffffffff, 0x00000000,
462         0xac0c, 0xffffffff, 0x00001032,
463         0xac0c, 0xffffffff, 0x00001032,
464         0xac0c, 0xffffffff, 0x00001032,
465         0x88d4, 0x0000001f, 0x00000010,
466         0x88d4, 0x0000001f, 0x00000010,
467         0x88d4, 0x0000001f, 0x00000010,
468         0x15c0, 0x000c0fc0, 0x000c0400
469 };
470
471 static const u32 oland_golden_rlc_registers[] =
472 {
473         0xc424, 0xffffffff, 0x00601005,
474         0xc47c, 0xffffffff, 0x10104040,
475         0xc488, 0xffffffff, 0x0100000a,
476         0xc314, 0xffffffff, 0x00000800,
477         0xc30c, 0xffffffff, 0x800000f4
478 };
479
480 static const u32 oland_golden_registers[] =
481 {
482         0x9a10, 0x00010000, 0x00018208,
483         0x9830, 0xffffffff, 0x00000000,
484         0x9834, 0xf00fffff, 0x00000400,
485         0x9838, 0x0002021c, 0x00020200,
486         0xc78, 0x00000080, 0x00000000,
487         0xd030, 0x000300c0, 0x00800040,
488         0xd830, 0x000300c0, 0x00800040,
489         0x5bb0, 0x000000f0, 0x00000070,
490         0x5bc0, 0x00200000, 0x50100000,
491         0x7030, 0x31000311, 0x00000011,
492         0x2ae4, 0x00073ffe, 0x000022a2,
493         0x240c, 0x000007ff, 0x00000000,
494         0x8a14, 0xf000001f, 0x00000007,
495         0x8b24, 0xffffffff, 0x00ffffff,
496         0x8b10, 0x0000ff0f, 0x00000000,
497         0x28a4c, 0x07ffffff, 0x4e000000,
498         0x28350, 0x3f3f3fff, 0x00000082,
499         0x30, 0x000000ff, 0x0040,
500         0x34, 0x00000040, 0x00004040,
501         0x9100, 0x07ffffff, 0x03000000,
502         0x9060, 0x0000007f, 0x00000020,
503         0x9508, 0x00010000, 0x00010000,
504         0xac14, 0x000003ff, 0x000000f3,
505         0xac10, 0xffffffff, 0x00000000,
506         0xac0c, 0xffffffff, 0x00003210,
507         0x88d4, 0x0000001f, 0x00000010,
508         0x15c0, 0x000c0fc0, 0x000c0400
509 };
510
511 static const u32 hainan_golden_registers[] =
512 {
513         0x9a10, 0x00010000, 0x00018208,
514         0x9830, 0xffffffff, 0x00000000,
515         0x9834, 0xf00fffff, 0x00000400,
516         0x9838, 0x0002021c, 0x00020200,
517         0xd0c0, 0xff000fff, 0x00000100,
518         0xd030, 0x000300c0, 0x00800040,
519         0xd8c0, 0xff000fff, 0x00000100,
520         0xd830, 0x000300c0, 0x00800040,
521         0x2ae4, 0x00073ffe, 0x000022a2,
522         0x240c, 0x000007ff, 0x00000000,
523         0x8a14, 0xf000001f, 0x00000007,
524         0x8b24, 0xffffffff, 0x00ffffff,
525         0x8b10, 0x0000ff0f, 0x00000000,
526         0x28a4c, 0x07ffffff, 0x4e000000,
527         0x28350, 0x3f3f3fff, 0x00000000,
528         0x30, 0x000000ff, 0x0040,
529         0x34, 0x00000040, 0x00004040,
530         0x9100, 0x03e00000, 0x03600000,
531         0x9060, 0x0000007f, 0x00000020,
532         0x9508, 0x00010000, 0x00010000,
533         0xac14, 0x000003ff, 0x000000f1,
534         0xac10, 0xffffffff, 0x00000000,
535         0xac0c, 0xffffffff, 0x00003210,
536         0x88d4, 0x0000001f, 0x00000010,
537         0x15c0, 0x000c0fc0, 0x000c0400
538 };
539
540 static const u32 hainan_golden_registers2[] =
541 {
542         0x98f8, 0xffffffff, 0x02010001
543 };
544
545 static const u32 tahiti_mgcg_cgcg_init[] =
546 {
547         0xc400, 0xffffffff, 0xfffffffc,
548         0x802c, 0xffffffff, 0xe0000000,
549         0x9a60, 0xffffffff, 0x00000100,
550         0x92a4, 0xffffffff, 0x00000100,
551         0xc164, 0xffffffff, 0x00000100,
552         0x9774, 0xffffffff, 0x00000100,
553         0x8984, 0xffffffff, 0x06000100,
554         0x8a18, 0xffffffff, 0x00000100,
555         0x92a0, 0xffffffff, 0x00000100,
556         0xc380, 0xffffffff, 0x00000100,
557         0x8b28, 0xffffffff, 0x00000100,
558         0x9144, 0xffffffff, 0x00000100,
559         0x8d88, 0xffffffff, 0x00000100,
560         0x8d8c, 0xffffffff, 0x00000100,
561         0x9030, 0xffffffff, 0x00000100,
562         0x9034, 0xffffffff, 0x00000100,
563         0x9038, 0xffffffff, 0x00000100,
564         0x903c, 0xffffffff, 0x00000100,
565         0xad80, 0xffffffff, 0x00000100,
566         0xac54, 0xffffffff, 0x00000100,
567         0x897c, 0xffffffff, 0x06000100,
568         0x9868, 0xffffffff, 0x00000100,
569         0x9510, 0xffffffff, 0x00000100,
570         0xaf04, 0xffffffff, 0x00000100,
571         0xae04, 0xffffffff, 0x00000100,
572         0x949c, 0xffffffff, 0x00000100,
573         0x802c, 0xffffffff, 0xe0000000,
574         0x9160, 0xffffffff, 0x00010000,
575         0x9164, 0xffffffff, 0x00030002,
576         0x9168, 0xffffffff, 0x00040007,
577         0x916c, 0xffffffff, 0x00060005,
578         0x9170, 0xffffffff, 0x00090008,
579         0x9174, 0xffffffff, 0x00020001,
580         0x9178, 0xffffffff, 0x00040003,
581         0x917c, 0xffffffff, 0x00000007,
582         0x9180, 0xffffffff, 0x00060005,
583         0x9184, 0xffffffff, 0x00090008,
584         0x9188, 0xffffffff, 0x00030002,
585         0x918c, 0xffffffff, 0x00050004,
586         0x9190, 0xffffffff, 0x00000008,
587         0x9194, 0xffffffff, 0x00070006,
588         0x9198, 0xffffffff, 0x000a0009,
589         0x919c, 0xffffffff, 0x00040003,
590         0x91a0, 0xffffffff, 0x00060005,
591         0x91a4, 0xffffffff, 0x00000009,
592         0x91a8, 0xffffffff, 0x00080007,
593         0x91ac, 0xffffffff, 0x000b000a,
594         0x91b0, 0xffffffff, 0x00050004,
595         0x91b4, 0xffffffff, 0x00070006,
596         0x91b8, 0xffffffff, 0x0008000b,
597         0x91bc, 0xffffffff, 0x000a0009,
598         0x91c0, 0xffffffff, 0x000d000c,
599         0x91c4, 0xffffffff, 0x00060005,
600         0x91c8, 0xffffffff, 0x00080007,
601         0x91cc, 0xffffffff, 0x0000000b,
602         0x91d0, 0xffffffff, 0x000a0009,
603         0x91d4, 0xffffffff, 0x000d000c,
604         0x91d8, 0xffffffff, 0x00070006,
605         0x91dc, 0xffffffff, 0x00090008,
606         0x91e0, 0xffffffff, 0x0000000c,
607         0x91e4, 0xffffffff, 0x000b000a,
608         0x91e8, 0xffffffff, 0x000e000d,
609         0x91ec, 0xffffffff, 0x00080007,
610         0x91f0, 0xffffffff, 0x000a0009,
611         0x91f4, 0xffffffff, 0x0000000d,
612         0x91f8, 0xffffffff, 0x000c000b,
613         0x91fc, 0xffffffff, 0x000f000e,
614         0x9200, 0xffffffff, 0x00090008,
615         0x9204, 0xffffffff, 0x000b000a,
616         0x9208, 0xffffffff, 0x000c000f,
617         0x920c, 0xffffffff, 0x000e000d,
618         0x9210, 0xffffffff, 0x00110010,
619         0x9214, 0xffffffff, 0x000a0009,
620         0x9218, 0xffffffff, 0x000c000b,
621         0x921c, 0xffffffff, 0x0000000f,
622         0x9220, 0xffffffff, 0x000e000d,
623         0x9224, 0xffffffff, 0x00110010,
624         0x9228, 0xffffffff, 0x000b000a,
625         0x922c, 0xffffffff, 0x000d000c,
626         0x9230, 0xffffffff, 0x00000010,
627         0x9234, 0xffffffff, 0x000f000e,
628         0x9238, 0xffffffff, 0x00120011,
629         0x923c, 0xffffffff, 0x000c000b,
630         0x9240, 0xffffffff, 0x000e000d,
631         0x9244, 0xffffffff, 0x00000011,
632         0x9248, 0xffffffff, 0x0010000f,
633         0x924c, 0xffffffff, 0x00130012,
634         0x9250, 0xffffffff, 0x000d000c,
635         0x9254, 0xffffffff, 0x000f000e,
636         0x9258, 0xffffffff, 0x00100013,
637         0x925c, 0xffffffff, 0x00120011,
638         0x9260, 0xffffffff, 0x00150014,
639         0x9264, 0xffffffff, 0x000e000d,
640         0x9268, 0xffffffff, 0x0010000f,
641         0x926c, 0xffffffff, 0x00000013,
642         0x9270, 0xffffffff, 0x00120011,
643         0x9274, 0xffffffff, 0x00150014,
644         0x9278, 0xffffffff, 0x000f000e,
645         0x927c, 0xffffffff, 0x00110010,
646         0x9280, 0xffffffff, 0x00000014,
647         0x9284, 0xffffffff, 0x00130012,
648         0x9288, 0xffffffff, 0x00160015,
649         0x928c, 0xffffffff, 0x0010000f,
650         0x9290, 0xffffffff, 0x00120011,
651         0x9294, 0xffffffff, 0x00000015,
652         0x9298, 0xffffffff, 0x00140013,
653         0x929c, 0xffffffff, 0x00170016,
654         0x9150, 0xffffffff, 0x96940200,
655         0x8708, 0xffffffff, 0x00900100,
656         0xc478, 0xffffffff, 0x00000080,
657         0xc404, 0xffffffff, 0x0020003f,
658         0x30, 0xffffffff, 0x0000001c,
659         0x34, 0x000f0000, 0x000f0000,
660         0x160c, 0xffffffff, 0x00000100,
661         0x1024, 0xffffffff, 0x00000100,
662         0x102c, 0x00000101, 0x00000000,
663         0x20a8, 0xffffffff, 0x00000104,
664         0x264c, 0x000c0000, 0x000c0000,
665         0x2648, 0x000c0000, 0x000c0000,
666         0x55e4, 0xff000fff, 0x00000100,
667         0x55e8, 0x00000001, 0x00000001,
668         0x2f50, 0x00000001, 0x00000001,
669         0x30cc, 0xc0000fff, 0x00000104,
670         0xc1e4, 0x00000001, 0x00000001,
671         0xd0c0, 0xfffffff0, 0x00000100,
672         0xd8c0, 0xfffffff0, 0x00000100
673 };
674
675 static const u32 pitcairn_mgcg_cgcg_init[] =
676 {
677         0xc400, 0xffffffff, 0xfffffffc,
678         0x802c, 0xffffffff, 0xe0000000,
679         0x9a60, 0xffffffff, 0x00000100,
680         0x92a4, 0xffffffff, 0x00000100,
681         0xc164, 0xffffffff, 0x00000100,
682         0x9774, 0xffffffff, 0x00000100,
683         0x8984, 0xffffffff, 0x06000100,
684         0x8a18, 0xffffffff, 0x00000100,
685         0x92a0, 0xffffffff, 0x00000100,
686         0xc380, 0xffffffff, 0x00000100,
687         0x8b28, 0xffffffff, 0x00000100,
688         0x9144, 0xffffffff, 0x00000100,
689         0x8d88, 0xffffffff, 0x00000100,
690         0x8d8c, 0xffffffff, 0x00000100,
691         0x9030, 0xffffffff, 0x00000100,
692         0x9034, 0xffffffff, 0x00000100,
693         0x9038, 0xffffffff, 0x00000100,
694         0x903c, 0xffffffff, 0x00000100,
695         0xad80, 0xffffffff, 0x00000100,
696         0xac54, 0xffffffff, 0x00000100,
697         0x897c, 0xffffffff, 0x06000100,
698         0x9868, 0xffffffff, 0x00000100,
699         0x9510, 0xffffffff, 0x00000100,
700         0xaf04, 0xffffffff, 0x00000100,
701         0xae04, 0xffffffff, 0x00000100,
702         0x949c, 0xffffffff, 0x00000100,
703         0x802c, 0xffffffff, 0xe0000000,
704         0x9160, 0xffffffff, 0x00010000,
705         0x9164, 0xffffffff, 0x00030002,
706         0x9168, 0xffffffff, 0x00040007,
707         0x916c, 0xffffffff, 0x00060005,
708         0x9170, 0xffffffff, 0x00090008,
709         0x9174, 0xffffffff, 0x00020001,
710         0x9178, 0xffffffff, 0x00040003,
711         0x917c, 0xffffffff, 0x00000007,
712         0x9180, 0xffffffff, 0x00060005,
713         0x9184, 0xffffffff, 0x00090008,
714         0x9188, 0xffffffff, 0x00030002,
715         0x918c, 0xffffffff, 0x00050004,
716         0x9190, 0xffffffff, 0x00000008,
717         0x9194, 0xffffffff, 0x00070006,
718         0x9198, 0xffffffff, 0x000a0009,
719         0x919c, 0xffffffff, 0x00040003,
720         0x91a0, 0xffffffff, 0x00060005,
721         0x91a4, 0xffffffff, 0x00000009,
722         0x91a8, 0xffffffff, 0x00080007,
723         0x91ac, 0xffffffff, 0x000b000a,
724         0x91b0, 0xffffffff, 0x00050004,
725         0x91b4, 0xffffffff, 0x00070006,
726         0x91b8, 0xffffffff, 0x0008000b,
727         0x91bc, 0xffffffff, 0x000a0009,
728         0x91c0, 0xffffffff, 0x000d000c,
729         0x9200, 0xffffffff, 0x00090008,
730         0x9204, 0xffffffff, 0x000b000a,
731         0x9208, 0xffffffff, 0x000c000f,
732         0x920c, 0xffffffff, 0x000e000d,
733         0x9210, 0xffffffff, 0x00110010,
734         0x9214, 0xffffffff, 0x000a0009,
735         0x9218, 0xffffffff, 0x000c000b,
736         0x921c, 0xffffffff, 0x0000000f,
737         0x9220, 0xffffffff, 0x000e000d,
738         0x9224, 0xffffffff, 0x00110010,
739         0x9228, 0xffffffff, 0x000b000a,
740         0x922c, 0xffffffff, 0x000d000c,
741         0x9230, 0xffffffff, 0x00000010,
742         0x9234, 0xffffffff, 0x000f000e,
743         0x9238, 0xffffffff, 0x00120011,
744         0x923c, 0xffffffff, 0x000c000b,
745         0x9240, 0xffffffff, 0x000e000d,
746         0x9244, 0xffffffff, 0x00000011,
747         0x9248, 0xffffffff, 0x0010000f,
748         0x924c, 0xffffffff, 0x00130012,
749         0x9250, 0xffffffff, 0x000d000c,
750         0x9254, 0xffffffff, 0x000f000e,
751         0x9258, 0xffffffff, 0x00100013,
752         0x925c, 0xffffffff, 0x00120011,
753         0x9260, 0xffffffff, 0x00150014,
754         0x9150, 0xffffffff, 0x96940200,
755         0x8708, 0xffffffff, 0x00900100,
756         0xc478, 0xffffffff, 0x00000080,
757         0xc404, 0xffffffff, 0x0020003f,
758         0x30, 0xffffffff, 0x0000001c,
759         0x34, 0x000f0000, 0x000f0000,
760         0x160c, 0xffffffff, 0x00000100,
761         0x1024, 0xffffffff, 0x00000100,
762         0x102c, 0x00000101, 0x00000000,
763         0x20a8, 0xffffffff, 0x00000104,
764         0x55e4, 0xff000fff, 0x00000100,
765         0x55e8, 0x00000001, 0x00000001,
766         0x2f50, 0x00000001, 0x00000001,
767         0x30cc, 0xc0000fff, 0x00000104,
768         0xc1e4, 0x00000001, 0x00000001,
769         0xd0c0, 0xfffffff0, 0x00000100,
770         0xd8c0, 0xfffffff0, 0x00000100
771 };
772
773 static const u32 verde_mgcg_cgcg_init[] =
774 {
775         0xc400, 0xffffffff, 0xfffffffc,
776         0x802c, 0xffffffff, 0xe0000000,
777         0x9a60, 0xffffffff, 0x00000100,
778         0x92a4, 0xffffffff, 0x00000100,
779         0xc164, 0xffffffff, 0x00000100,
780         0x9774, 0xffffffff, 0x00000100,
781         0x8984, 0xffffffff, 0x06000100,
782         0x8a18, 0xffffffff, 0x00000100,
783         0x92a0, 0xffffffff, 0x00000100,
784         0xc380, 0xffffffff, 0x00000100,
785         0x8b28, 0xffffffff, 0x00000100,
786         0x9144, 0xffffffff, 0x00000100,
787         0x8d88, 0xffffffff, 0x00000100,
788         0x8d8c, 0xffffffff, 0x00000100,
789         0x9030, 0xffffffff, 0x00000100,
790         0x9034, 0xffffffff, 0x00000100,
791         0x9038, 0xffffffff, 0x00000100,
792         0x903c, 0xffffffff, 0x00000100,
793         0xad80, 0xffffffff, 0x00000100,
794         0xac54, 0xffffffff, 0x00000100,
795         0x897c, 0xffffffff, 0x06000100,
796         0x9868, 0xffffffff, 0x00000100,
797         0x9510, 0xffffffff, 0x00000100,
798         0xaf04, 0xffffffff, 0x00000100,
799         0xae04, 0xffffffff, 0x00000100,
800         0x949c, 0xffffffff, 0x00000100,
801         0x802c, 0xffffffff, 0xe0000000,
802         0x9160, 0xffffffff, 0x00010000,
803         0x9164, 0xffffffff, 0x00030002,
804         0x9168, 0xffffffff, 0x00040007,
805         0x916c, 0xffffffff, 0x00060005,
806         0x9170, 0xffffffff, 0x00090008,
807         0x9174, 0xffffffff, 0x00020001,
808         0x9178, 0xffffffff, 0x00040003,
809         0x917c, 0xffffffff, 0x00000007,
810         0x9180, 0xffffffff, 0x00060005,
811         0x9184, 0xffffffff, 0x00090008,
812         0x9188, 0xffffffff, 0x00030002,
813         0x918c, 0xffffffff, 0x00050004,
814         0x9190, 0xffffffff, 0x00000008,
815         0x9194, 0xffffffff, 0x00070006,
816         0x9198, 0xffffffff, 0x000a0009,
817         0x919c, 0xffffffff, 0x00040003,
818         0x91a0, 0xffffffff, 0x00060005,
819         0x91a4, 0xffffffff, 0x00000009,
820         0x91a8, 0xffffffff, 0x00080007,
821         0x91ac, 0xffffffff, 0x000b000a,
822         0x91b0, 0xffffffff, 0x00050004,
823         0x91b4, 0xffffffff, 0x00070006,
824         0x91b8, 0xffffffff, 0x0008000b,
825         0x91bc, 0xffffffff, 0x000a0009,
826         0x91c0, 0xffffffff, 0x000d000c,
827         0x9200, 0xffffffff, 0x00090008,
828         0x9204, 0xffffffff, 0x000b000a,
829         0x9208, 0xffffffff, 0x000c000f,
830         0x920c, 0xffffffff, 0x000e000d,
831         0x9210, 0xffffffff, 0x00110010,
832         0x9214, 0xffffffff, 0x000a0009,
833         0x9218, 0xffffffff, 0x000c000b,
834         0x921c, 0xffffffff, 0x0000000f,
835         0x9220, 0xffffffff, 0x000e000d,
836         0x9224, 0xffffffff, 0x00110010,
837         0x9228, 0xffffffff, 0x000b000a,
838         0x922c, 0xffffffff, 0x000d000c,
839         0x9230, 0xffffffff, 0x00000010,
840         0x9234, 0xffffffff, 0x000f000e,
841         0x9238, 0xffffffff, 0x00120011,
842         0x923c, 0xffffffff, 0x000c000b,
843         0x9240, 0xffffffff, 0x000e000d,
844         0x9244, 0xffffffff, 0x00000011,
845         0x9248, 0xffffffff, 0x0010000f,
846         0x924c, 0xffffffff, 0x00130012,
847         0x9250, 0xffffffff, 0x000d000c,
848         0x9254, 0xffffffff, 0x000f000e,
849         0x9258, 0xffffffff, 0x00100013,
850         0x925c, 0xffffffff, 0x00120011,
851         0x9260, 0xffffffff, 0x00150014,
852         0x9150, 0xffffffff, 0x96940200,
853         0x8708, 0xffffffff, 0x00900100,
854         0xc478, 0xffffffff, 0x00000080,
855         0xc404, 0xffffffff, 0x0020003f,
856         0x30, 0xffffffff, 0x0000001c,
857         0x34, 0x000f0000, 0x000f0000,
858         0x160c, 0xffffffff, 0x00000100,
859         0x1024, 0xffffffff, 0x00000100,
860         0x102c, 0x00000101, 0x00000000,
861         0x20a8, 0xffffffff, 0x00000104,
862         0x264c, 0x000c0000, 0x000c0000,
863         0x2648, 0x000c0000, 0x000c0000,
864         0x55e4, 0xff000fff, 0x00000100,
865         0x55e8, 0x00000001, 0x00000001,
866         0x2f50, 0x00000001, 0x00000001,
867         0x30cc, 0xc0000fff, 0x00000104,
868         0xc1e4, 0x00000001, 0x00000001,
869         0xd0c0, 0xfffffff0, 0x00000100,
870         0xd8c0, 0xfffffff0, 0x00000100
871 };
872
873 static const u32 oland_mgcg_cgcg_init[] =
874 {
875         0xc400, 0xffffffff, 0xfffffffc,
876         0x802c, 0xffffffff, 0xe0000000,
877         0x9a60, 0xffffffff, 0x00000100,
878         0x92a4, 0xffffffff, 0x00000100,
879         0xc164, 0xffffffff, 0x00000100,
880         0x9774, 0xffffffff, 0x00000100,
881         0x8984, 0xffffffff, 0x06000100,
882         0x8a18, 0xffffffff, 0x00000100,
883         0x92a0, 0xffffffff, 0x00000100,
884         0xc380, 0xffffffff, 0x00000100,
885         0x8b28, 0xffffffff, 0x00000100,
886         0x9144, 0xffffffff, 0x00000100,
887         0x8d88, 0xffffffff, 0x00000100,
888         0x8d8c, 0xffffffff, 0x00000100,
889         0x9030, 0xffffffff, 0x00000100,
890         0x9034, 0xffffffff, 0x00000100,
891         0x9038, 0xffffffff, 0x00000100,
892         0x903c, 0xffffffff, 0x00000100,
893         0xad80, 0xffffffff, 0x00000100,
894         0xac54, 0xffffffff, 0x00000100,
895         0x897c, 0xffffffff, 0x06000100,
896         0x9868, 0xffffffff, 0x00000100,
897         0x9510, 0xffffffff, 0x00000100,
898         0xaf04, 0xffffffff, 0x00000100,
899         0xae04, 0xffffffff, 0x00000100,
900         0x949c, 0xffffffff, 0x00000100,
901         0x802c, 0xffffffff, 0xe0000000,
902         0x9160, 0xffffffff, 0x00010000,
903         0x9164, 0xffffffff, 0x00030002,
904         0x9168, 0xffffffff, 0x00040007,
905         0x916c, 0xffffffff, 0x00060005,
906         0x9170, 0xffffffff, 0x00090008,
907         0x9174, 0xffffffff, 0x00020001,
908         0x9178, 0xffffffff, 0x00040003,
909         0x917c, 0xffffffff, 0x00000007,
910         0x9180, 0xffffffff, 0x00060005,
911         0x9184, 0xffffffff, 0x00090008,
912         0x9188, 0xffffffff, 0x00030002,
913         0x918c, 0xffffffff, 0x00050004,
914         0x9190, 0xffffffff, 0x00000008,
915         0x9194, 0xffffffff, 0x00070006,
916         0x9198, 0xffffffff, 0x000a0009,
917         0x919c, 0xffffffff, 0x00040003,
918         0x91a0, 0xffffffff, 0x00060005,
919         0x91a4, 0xffffffff, 0x00000009,
920         0x91a8, 0xffffffff, 0x00080007,
921         0x91ac, 0xffffffff, 0x000b000a,
922         0x91b0, 0xffffffff, 0x00050004,
923         0x91b4, 0xffffffff, 0x00070006,
924         0x91b8, 0xffffffff, 0x0008000b,
925         0x91bc, 0xffffffff, 0x000a0009,
926         0x91c0, 0xffffffff, 0x000d000c,
927         0x91c4, 0xffffffff, 0x00060005,
928         0x91c8, 0xffffffff, 0x00080007,
929         0x91cc, 0xffffffff, 0x0000000b,
930         0x91d0, 0xffffffff, 0x000a0009,
931         0x91d4, 0xffffffff, 0x000d000c,
932         0x9150, 0xffffffff, 0x96940200,
933         0x8708, 0xffffffff, 0x00900100,
934         0xc478, 0xffffffff, 0x00000080,
935         0xc404, 0xffffffff, 0x0020003f,
936         0x30, 0xffffffff, 0x0000001c,
937         0x34, 0x000f0000, 0x000f0000,
938         0x160c, 0xffffffff, 0x00000100,
939         0x1024, 0xffffffff, 0x00000100,
940         0x102c, 0x00000101, 0x00000000,
941         0x20a8, 0xffffffff, 0x00000104,
942         0x264c, 0x000c0000, 0x000c0000,
943         0x2648, 0x000c0000, 0x000c0000,
944         0x55e4, 0xff000fff, 0x00000100,
945         0x55e8, 0x00000001, 0x00000001,
946         0x2f50, 0x00000001, 0x00000001,
947         0x30cc, 0xc0000fff, 0x00000104,
948         0xc1e4, 0x00000001, 0x00000001,
949         0xd0c0, 0xfffffff0, 0x00000100,
950         0xd8c0, 0xfffffff0, 0x00000100
951 };
952
953 static const u32 hainan_mgcg_cgcg_init[] =
954 {
955         0xc400, 0xffffffff, 0xfffffffc,
956         0x802c, 0xffffffff, 0xe0000000,
957         0x9a60, 0xffffffff, 0x00000100,
958         0x92a4, 0xffffffff, 0x00000100,
959         0xc164, 0xffffffff, 0x00000100,
960         0x9774, 0xffffffff, 0x00000100,
961         0x8984, 0xffffffff, 0x06000100,
962         0x8a18, 0xffffffff, 0x00000100,
963         0x92a0, 0xffffffff, 0x00000100,
964         0xc380, 0xffffffff, 0x00000100,
965         0x8b28, 0xffffffff, 0x00000100,
966         0x9144, 0xffffffff, 0x00000100,
967         0x8d88, 0xffffffff, 0x00000100,
968         0x8d8c, 0xffffffff, 0x00000100,
969         0x9030, 0xffffffff, 0x00000100,
970         0x9034, 0xffffffff, 0x00000100,
971         0x9038, 0xffffffff, 0x00000100,
972         0x903c, 0xffffffff, 0x00000100,
973         0xad80, 0xffffffff, 0x00000100,
974         0xac54, 0xffffffff, 0x00000100,
975         0x897c, 0xffffffff, 0x06000100,
976         0x9868, 0xffffffff, 0x00000100,
977         0x9510, 0xffffffff, 0x00000100,
978         0xaf04, 0xffffffff, 0x00000100,
979         0xae04, 0xffffffff, 0x00000100,
980         0x949c, 0xffffffff, 0x00000100,
981         0x802c, 0xffffffff, 0xe0000000,
982         0x9160, 0xffffffff, 0x00010000,
983         0x9164, 0xffffffff, 0x00030002,
984         0x9168, 0xffffffff, 0x00040007,
985         0x916c, 0xffffffff, 0x00060005,
986         0x9170, 0xffffffff, 0x00090008,
987         0x9174, 0xffffffff, 0x00020001,
988         0x9178, 0xffffffff, 0x00040003,
989         0x917c, 0xffffffff, 0x00000007,
990         0x9180, 0xffffffff, 0x00060005,
991         0x9184, 0xffffffff, 0x00090008,
992         0x9188, 0xffffffff, 0x00030002,
993         0x918c, 0xffffffff, 0x00050004,
994         0x9190, 0xffffffff, 0x00000008,
995         0x9194, 0xffffffff, 0x00070006,
996         0x9198, 0xffffffff, 0x000a0009,
997         0x919c, 0xffffffff, 0x00040003,
998         0x91a0, 0xffffffff, 0x00060005,
999         0x91a4, 0xffffffff, 0x00000009,
1000         0x91a8, 0xffffffff, 0x00080007,
1001         0x91ac, 0xffffffff, 0x000b000a,
1002         0x91b0, 0xffffffff, 0x00050004,
1003         0x91b4, 0xffffffff, 0x00070006,
1004         0x91b8, 0xffffffff, 0x0008000b,
1005         0x91bc, 0xffffffff, 0x000a0009,
1006         0x91c0, 0xffffffff, 0x000d000c,
1007         0x91c4, 0xffffffff, 0x00060005,
1008         0x91c8, 0xffffffff, 0x00080007,
1009         0x91cc, 0xffffffff, 0x0000000b,
1010         0x91d0, 0xffffffff, 0x000a0009,
1011         0x91d4, 0xffffffff, 0x000d000c,
1012         0x9150, 0xffffffff, 0x96940200,
1013         0x8708, 0xffffffff, 0x00900100,
1014         0xc478, 0xffffffff, 0x00000080,
1015         0xc404, 0xffffffff, 0x0020003f,
1016         0x30, 0xffffffff, 0x0000001c,
1017         0x34, 0x000f0000, 0x000f0000,
1018         0x160c, 0xffffffff, 0x00000100,
1019         0x1024, 0xffffffff, 0x00000100,
1020         0x20a8, 0xffffffff, 0x00000104,
1021         0x264c, 0x000c0000, 0x000c0000,
1022         0x2648, 0x000c0000, 0x000c0000,
1023         0x2f50, 0x00000001, 0x00000001,
1024         0x30cc, 0xc0000fff, 0x00000104,
1025         0xc1e4, 0x00000001, 0x00000001,
1026         0xd0c0, 0xfffffff0, 0x00000100,
1027         0xd8c0, 0xfffffff0, 0x00000100
1028 };
1029
1030 static u32 verde_pg_init[] =
1031 {
1032         0x353c, 0xffffffff, 0x40000,
1033         0x3538, 0xffffffff, 0x200010ff,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x7007,
1040         0x3538, 0xffffffff, 0x300010ff,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x400000,
1047         0x3538, 0xffffffff, 0x100010ff,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x120200,
1054         0x3538, 0xffffffff, 0x500010ff,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x1e1e16,
1061         0x3538, 0xffffffff, 0x600010ff,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x0,
1064         0x353c, 0xffffffff, 0x0,
1065         0x353c, 0xffffffff, 0x0,
1066         0x353c, 0xffffffff, 0x0,
1067         0x353c, 0xffffffff, 0x171f1e,
1068         0x3538, 0xffffffff, 0x700010ff,
1069         0x353c, 0xffffffff, 0x0,
1070         0x353c, 0xffffffff, 0x0,
1071         0x353c, 0xffffffff, 0x0,
1072         0x353c, 0xffffffff, 0x0,
1073         0x353c, 0xffffffff, 0x0,
1074         0x353c, 0xffffffff, 0x0,
1075         0x3538, 0xffffffff, 0x9ff,
1076         0x3500, 0xffffffff, 0x0,
1077         0x3504, 0xffffffff, 0x10000800,
1078         0x3504, 0xffffffff, 0xf,
1079         0x3504, 0xffffffff, 0xf,
1080         0x3500, 0xffffffff, 0x4,
1081         0x3504, 0xffffffff, 0x1000051e,
1082         0x3504, 0xffffffff, 0xffff,
1083         0x3504, 0xffffffff, 0xffff,
1084         0x3500, 0xffffffff, 0x8,
1085         0x3504, 0xffffffff, 0x80500,
1086         0x3500, 0xffffffff, 0x12,
1087         0x3504, 0xffffffff, 0x9050c,
1088         0x3500, 0xffffffff, 0x1d,
1089         0x3504, 0xffffffff, 0xb052c,
1090         0x3500, 0xffffffff, 0x2a,
1091         0x3504, 0xffffffff, 0x1053e,
1092         0x3500, 0xffffffff, 0x2d,
1093         0x3504, 0xffffffff, 0x10546,
1094         0x3500, 0xffffffff, 0x30,
1095         0x3504, 0xffffffff, 0xa054e,
1096         0x3500, 0xffffffff, 0x3c,
1097         0x3504, 0xffffffff, 0x1055f,
1098         0x3500, 0xffffffff, 0x3f,
1099         0x3504, 0xffffffff, 0x10567,
1100         0x3500, 0xffffffff, 0x42,
1101         0x3504, 0xffffffff, 0x1056f,
1102         0x3500, 0xffffffff, 0x45,
1103         0x3504, 0xffffffff, 0x10572,
1104         0x3500, 0xffffffff, 0x48,
1105         0x3504, 0xffffffff, 0x20575,
1106         0x3500, 0xffffffff, 0x4c,
1107         0x3504, 0xffffffff, 0x190801,
1108         0x3500, 0xffffffff, 0x67,
1109         0x3504, 0xffffffff, 0x1082a,
1110         0x3500, 0xffffffff, 0x6a,
1111         0x3504, 0xffffffff, 0x1b082d,
1112         0x3500, 0xffffffff, 0x87,
1113         0x3504, 0xffffffff, 0x310851,
1114         0x3500, 0xffffffff, 0xba,
1115         0x3504, 0xffffffff, 0x891,
1116         0x3500, 0xffffffff, 0xbc,
1117         0x3504, 0xffffffff, 0x893,
1118         0x3500, 0xffffffff, 0xbe,
1119         0x3504, 0xffffffff, 0x20895,
1120         0x3500, 0xffffffff, 0xc2,
1121         0x3504, 0xffffffff, 0x20899,
1122         0x3500, 0xffffffff, 0xc6,
1123         0x3504, 0xffffffff, 0x2089d,
1124         0x3500, 0xffffffff, 0xca,
1125         0x3504, 0xffffffff, 0x8a1,
1126         0x3500, 0xffffffff, 0xcc,
1127         0x3504, 0xffffffff, 0x8a3,
1128         0x3500, 0xffffffff, 0xce,
1129         0x3504, 0xffffffff, 0x308a5,
1130         0x3500, 0xffffffff, 0xd3,
1131         0x3504, 0xffffffff, 0x6d08cd,
1132         0x3500, 0xffffffff, 0x142,
1133         0x3504, 0xffffffff, 0x2000095a,
1134         0x3504, 0xffffffff, 0x1,
1135         0x3500, 0xffffffff, 0x144,
1136         0x3504, 0xffffffff, 0x301f095b,
1137         0x3500, 0xffffffff, 0x165,
1138         0x3504, 0xffffffff, 0xc094d,
1139         0x3500, 0xffffffff, 0x173,
1140         0x3504, 0xffffffff, 0xf096d,
1141         0x3500, 0xffffffff, 0x184,
1142         0x3504, 0xffffffff, 0x15097f,
1143         0x3500, 0xffffffff, 0x19b,
1144         0x3504, 0xffffffff, 0xc0998,
1145         0x3500, 0xffffffff, 0x1a9,
1146         0x3504, 0xffffffff, 0x409a7,
1147         0x3500, 0xffffffff, 0x1af,
1148         0x3504, 0xffffffff, 0xcdc,
1149         0x3500, 0xffffffff, 0x1b1,
1150         0x3504, 0xffffffff, 0x800,
1151         0x3508, 0xffffffff, 0x6c9b2000,
1152         0x3510, 0xfc00, 0x2000,
1153         0x3544, 0xffffffff, 0xfc0,
1154         0x28d4, 0x00000100, 0x100
1155 };
1156
1157 static void si_init_golden_registers(struct radeon_device *rdev)
1158 {
1159         switch (rdev->family) {
1160         case CHIP_TAHITI:
1161                 radeon_program_register_sequence(rdev,
1162                                                  tahiti_golden_registers,
1163                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1164                 radeon_program_register_sequence(rdev,
1165                                                  tahiti_golden_rlc_registers,
1166                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1167                 radeon_program_register_sequence(rdev,
1168                                                  tahiti_mgcg_cgcg_init,
1169                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1170                 radeon_program_register_sequence(rdev,
1171                                                  tahiti_golden_registers2,
1172                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1173                 break;
1174         case CHIP_PITCAIRN:
1175                 radeon_program_register_sequence(rdev,
1176                                                  pitcairn_golden_registers,
1177                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1178                 radeon_program_register_sequence(rdev,
1179                                                  pitcairn_golden_rlc_registers,
1180                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1181                 radeon_program_register_sequence(rdev,
1182                                                  pitcairn_mgcg_cgcg_init,
1183                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1184                 break;
1185         case CHIP_VERDE:
1186                 radeon_program_register_sequence(rdev,
1187                                                  verde_golden_registers,
1188                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1189                 radeon_program_register_sequence(rdev,
1190                                                  verde_golden_rlc_registers,
1191                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1192                 radeon_program_register_sequence(rdev,
1193                                                  verde_mgcg_cgcg_init,
1194                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1195                 radeon_program_register_sequence(rdev,
1196                                                  verde_pg_init,
1197                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1198                 break;
1199         case CHIP_OLAND:
1200                 radeon_program_register_sequence(rdev,
1201                                                  oland_golden_registers,
1202                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1203                 radeon_program_register_sequence(rdev,
1204                                                  oland_golden_rlc_registers,
1205                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1206                 radeon_program_register_sequence(rdev,
1207                                                  oland_mgcg_cgcg_init,
1208                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1209                 break;
1210         case CHIP_HAINAN:
1211                 radeon_program_register_sequence(rdev,
1212                                                  hainan_golden_registers,
1213                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1214                 radeon_program_register_sequence(rdev,
1215                                                  hainan_golden_registers2,
1216                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1217                 radeon_program_register_sequence(rdev,
1218                                                  hainan_mgcg_cgcg_init,
1219                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1220                 break;
1221         default:
1222                 break;
1223         }
1224 }
1225
1226 #define PCIE_BUS_CLK                10000
1227 #define TCLK                        (PCIE_BUS_CLK / 10)
1228
1229 /**
1230  * si_get_xclk - get the xclk
1231  *
1232  * @rdev: radeon_device pointer
1233  *
1234  * Returns the reference clock used by the gfx engine
1235  * (SI).
1236  */
1237 u32 si_get_xclk(struct radeon_device *rdev)
1238 {
1239         u32 reference_clock = rdev->clock.spll.reference_freq;
1240         u32 tmp;
1241
1242         tmp = RREG32(CG_CLKPIN_CNTL_2);
1243         if (tmp & MUX_TCLK_TO_XCLK)
1244                 return TCLK;
1245
1246         tmp = RREG32(CG_CLKPIN_CNTL);
1247         if (tmp & XTALIN_DIVIDE)
1248                 return reference_clock / 4;
1249
1250         return reference_clock;
1251 }
1252
1253 /* get temperature in millidegrees */
1254 int si_get_temp(struct radeon_device *rdev)
1255 {
1256         u32 temp;
1257         int actual_temp = 0;
1258
1259         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1260                 CTF_TEMP_SHIFT;
1261
1262         if (temp & 0x200)
1263                 actual_temp = 255;
1264         else
1265                 actual_temp = temp & 0x1ff;
1266
1267         actual_temp = (actual_temp * 1000);
1268
1269         return actual_temp;
1270 }
1271
1272 #define TAHITI_IO_MC_REGS_SIZE 36
1273
1274 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1275         {0x0000006f, 0x03044000},
1276         {0x00000070, 0x0480c018},
1277         {0x00000071, 0x00000040},
1278         {0x00000072, 0x01000000},
1279         {0x00000074, 0x000000ff},
1280         {0x00000075, 0x00143400},
1281         {0x00000076, 0x08ec0800},
1282         {0x00000077, 0x040000cc},
1283         {0x00000079, 0x00000000},
1284         {0x0000007a, 0x21000409},
1285         {0x0000007c, 0x00000000},
1286         {0x0000007d, 0xe8000000},
1287         {0x0000007e, 0x044408a8},
1288         {0x0000007f, 0x00000003},
1289         {0x00000080, 0x00000000},
1290         {0x00000081, 0x01000000},
1291         {0x00000082, 0x02000000},
1292         {0x00000083, 0x00000000},
1293         {0x00000084, 0xe3f3e4f4},
1294         {0x00000085, 0x00052024},
1295         {0x00000087, 0x00000000},
1296         {0x00000088, 0x66036603},
1297         {0x00000089, 0x01000000},
1298         {0x0000008b, 0x1c0a0000},
1299         {0x0000008c, 0xff010000},
1300         {0x0000008e, 0xffffefff},
1301         {0x0000008f, 0xfff3efff},
1302         {0x00000090, 0xfff3efbf},
1303         {0x00000094, 0x00101101},
1304         {0x00000095, 0x00000fff},
1305         {0x00000096, 0x00116fff},
1306         {0x00000097, 0x60010000},
1307         {0x00000098, 0x10010000},
1308         {0x00000099, 0x00006000},
1309         {0x0000009a, 0x00001000},
1310         {0x0000009f, 0x00a77400}
1311 };
1312
1313 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1314         {0x0000006f, 0x03044000},
1315         {0x00000070, 0x0480c018},
1316         {0x00000071, 0x00000040},
1317         {0x00000072, 0x01000000},
1318         {0x00000074, 0x000000ff},
1319         {0x00000075, 0x00143400},
1320         {0x00000076, 0x08ec0800},
1321         {0x00000077, 0x040000cc},
1322         {0x00000079, 0x00000000},
1323         {0x0000007a, 0x21000409},
1324         {0x0000007c, 0x00000000},
1325         {0x0000007d, 0xe8000000},
1326         {0x0000007e, 0x044408a8},
1327         {0x0000007f, 0x00000003},
1328         {0x00000080, 0x00000000},
1329         {0x00000081, 0x01000000},
1330         {0x00000082, 0x02000000},
1331         {0x00000083, 0x00000000},
1332         {0x00000084, 0xe3f3e4f4},
1333         {0x00000085, 0x00052024},
1334         {0x00000087, 0x00000000},
1335         {0x00000088, 0x66036603},
1336         {0x00000089, 0x01000000},
1337         {0x0000008b, 0x1c0a0000},
1338         {0x0000008c, 0xff010000},
1339         {0x0000008e, 0xffffefff},
1340         {0x0000008f, 0xfff3efff},
1341         {0x00000090, 0xfff3efbf},
1342         {0x00000094, 0x00101101},
1343         {0x00000095, 0x00000fff},
1344         {0x00000096, 0x00116fff},
1345         {0x00000097, 0x60010000},
1346         {0x00000098, 0x10010000},
1347         {0x00000099, 0x00006000},
1348         {0x0000009a, 0x00001000},
1349         {0x0000009f, 0x00a47400}
1350 };
1351
1352 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1353         {0x0000006f, 0x03044000},
1354         {0x00000070, 0x0480c018},
1355         {0x00000071, 0x00000040},
1356         {0x00000072, 0x01000000},
1357         {0x00000074, 0x000000ff},
1358         {0x00000075, 0x00143400},
1359         {0x00000076, 0x08ec0800},
1360         {0x00000077, 0x040000cc},
1361         {0x00000079, 0x00000000},
1362         {0x0000007a, 0x21000409},
1363         {0x0000007c, 0x00000000},
1364         {0x0000007d, 0xe8000000},
1365         {0x0000007e, 0x044408a8},
1366         {0x0000007f, 0x00000003},
1367         {0x00000080, 0x00000000},
1368         {0x00000081, 0x01000000},
1369         {0x00000082, 0x02000000},
1370         {0x00000083, 0x00000000},
1371         {0x00000084, 0xe3f3e4f4},
1372         {0x00000085, 0x00052024},
1373         {0x00000087, 0x00000000},
1374         {0x00000088, 0x66036603},
1375         {0x00000089, 0x01000000},
1376         {0x0000008b, 0x1c0a0000},
1377         {0x0000008c, 0xff010000},
1378         {0x0000008e, 0xffffefff},
1379         {0x0000008f, 0xfff3efff},
1380         {0x00000090, 0xfff3efbf},
1381         {0x00000094, 0x00101101},
1382         {0x00000095, 0x00000fff},
1383         {0x00000096, 0x00116fff},
1384         {0x00000097, 0x60010000},
1385         {0x00000098, 0x10010000},
1386         {0x00000099, 0x00006000},
1387         {0x0000009a, 0x00001000},
1388         {0x0000009f, 0x00a37400}
1389 };
1390
1391 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1392         {0x0000006f, 0x03044000},
1393         {0x00000070, 0x0480c018},
1394         {0x00000071, 0x00000040},
1395         {0x00000072, 0x01000000},
1396         {0x00000074, 0x000000ff},
1397         {0x00000075, 0x00143400},
1398         {0x00000076, 0x08ec0800},
1399         {0x00000077, 0x040000cc},
1400         {0x00000079, 0x00000000},
1401         {0x0000007a, 0x21000409},
1402         {0x0000007c, 0x00000000},
1403         {0x0000007d, 0xe8000000},
1404         {0x0000007e, 0x044408a8},
1405         {0x0000007f, 0x00000003},
1406         {0x00000080, 0x00000000},
1407         {0x00000081, 0x01000000},
1408         {0x00000082, 0x02000000},
1409         {0x00000083, 0x00000000},
1410         {0x00000084, 0xe3f3e4f4},
1411         {0x00000085, 0x00052024},
1412         {0x00000087, 0x00000000},
1413         {0x00000088, 0x66036603},
1414         {0x00000089, 0x01000000},
1415         {0x0000008b, 0x1c0a0000},
1416         {0x0000008c, 0xff010000},
1417         {0x0000008e, 0xffffefff},
1418         {0x0000008f, 0xfff3efff},
1419         {0x00000090, 0xfff3efbf},
1420         {0x00000094, 0x00101101},
1421         {0x00000095, 0x00000fff},
1422         {0x00000096, 0x00116fff},
1423         {0x00000097, 0x60010000},
1424         {0x00000098, 0x10010000},
1425         {0x00000099, 0x00006000},
1426         {0x0000009a, 0x00001000},
1427         {0x0000009f, 0x00a17730}
1428 };
1429
1430 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1431         {0x0000006f, 0x03044000},
1432         {0x00000070, 0x0480c018},
1433         {0x00000071, 0x00000040},
1434         {0x00000072, 0x01000000},
1435         {0x00000074, 0x000000ff},
1436         {0x00000075, 0x00143400},
1437         {0x00000076, 0x08ec0800},
1438         {0x00000077, 0x040000cc},
1439         {0x00000079, 0x00000000},
1440         {0x0000007a, 0x21000409},
1441         {0x0000007c, 0x00000000},
1442         {0x0000007d, 0xe8000000},
1443         {0x0000007e, 0x044408a8},
1444         {0x0000007f, 0x00000003},
1445         {0x00000080, 0x00000000},
1446         {0x00000081, 0x01000000},
1447         {0x00000082, 0x02000000},
1448         {0x00000083, 0x00000000},
1449         {0x00000084, 0xe3f3e4f4},
1450         {0x00000085, 0x00052024},
1451         {0x00000087, 0x00000000},
1452         {0x00000088, 0x66036603},
1453         {0x00000089, 0x01000000},
1454         {0x0000008b, 0x1c0a0000},
1455         {0x0000008c, 0xff010000},
1456         {0x0000008e, 0xffffefff},
1457         {0x0000008f, 0xfff3efff},
1458         {0x00000090, 0xfff3efbf},
1459         {0x00000094, 0x00101101},
1460         {0x00000095, 0x00000fff},
1461         {0x00000096, 0x00116fff},
1462         {0x00000097, 0x60010000},
1463         {0x00000098, 0x10010000},
1464         {0x00000099, 0x00006000},
1465         {0x0000009a, 0x00001000},
1466         {0x0000009f, 0x00a07730}
1467 };
1468
1469 /* ucode loading */
1470 int si_mc_load_microcode(struct radeon_device *rdev)
1471 {
1472         const __be32 *fw_data;
1473         u32 running, blackout = 0;
1474         u32 *io_mc_regs;
1475         int i, regs_size, ucode_size;
1476
1477         if (!rdev->mc_fw)
1478                 return -EINVAL;
1479
1480         ucode_size = rdev->mc_fw->size / 4;
1481
1482         switch (rdev->family) {
1483         case CHIP_TAHITI:
1484                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1485                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1486                 break;
1487         case CHIP_PITCAIRN:
1488                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1489                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1490                 break;
1491         case CHIP_VERDE:
1492         default:
1493                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1494                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1495                 break;
1496         case CHIP_OLAND:
1497                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1498                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1499                 break;
1500         case CHIP_HAINAN:
1501                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1502                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1503                 break;
1504         }
1505
1506         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1507
1508         if (running == 0) {
1509                 if (running) {
1510                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1511                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1512                 }
1513
1514                 /* reset the engine and set to writable */
1515                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1516                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1517
1518                 /* load mc io regs */
1519                 for (i = 0; i < regs_size; i++) {
1520                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1521                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1522                 }
1523                 /* load the MC ucode */
1524                 fw_data = (const __be32 *)rdev->mc_fw->data;
1525                 for (i = 0; i < ucode_size; i++)
1526                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1527
1528                 /* put the engine back into the active state */
1529                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1530                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1532
1533                 /* wait for training to complete */
1534                 for (i = 0; i < rdev->usec_timeout; i++) {
1535                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1536                                 break;
1537                         udelay(1);
1538                 }
1539                 for (i = 0; i < rdev->usec_timeout; i++) {
1540                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1541                                 break;
1542                         udelay(1);
1543                 }
1544
1545                 if (running)
1546                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1547         }
1548
1549         return 0;
1550 }
1551
1552 static int si_init_microcode(struct radeon_device *rdev)
1553 {
1554         const char *chip_name;
1555         const char *rlc_chip_name;
1556         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1557         size_t smc_req_size, mc2_req_size;
1558         char fw_name[30];
1559         int err;
1560
1561         DRM_DEBUG("\n");
1562
1563         switch (rdev->family) {
1564         case CHIP_TAHITI:
1565                 chip_name = "TAHITI";
1566                 rlc_chip_name = "TAHITI";
1567                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1568                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1569                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1570                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1571                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1572                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1573                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1574                 break;
1575         case CHIP_PITCAIRN:
1576                 chip_name = "PITCAIRN";
1577                 rlc_chip_name = "PITCAIRN";
1578                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1579                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1580                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1581                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1582                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1583                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1584                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1585                 break;
1586         case CHIP_VERDE:
1587                 chip_name = "VERDE";
1588                 rlc_chip_name = "VERDE";
1589                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1590                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1591                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1592                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1593                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1594                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1595                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1596                 break;
1597         case CHIP_OLAND:
1598                 chip_name = "OLAND";
1599                 rlc_chip_name = "OLAND";
1600                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1601                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1602                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1603                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1604                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1605                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1606                 break;
1607         case CHIP_HAINAN:
1608                 chip_name = "HAINAN";
1609                 rlc_chip_name = "HAINAN";
1610                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1611                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1612                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1613                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1614                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1615                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1616                 break;
1617         default: BUG();
1618         }
1619
1620         DRM_INFO("Loading %s Microcode\n", chip_name);
1621
1622         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1623         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1624         if (err)
1625                 goto out;
1626         if (rdev->pfp_fw->size != pfp_req_size) {
1627                 printk(KERN_ERR
1628                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1629                        rdev->pfp_fw->size, fw_name);
1630                 err = -EINVAL;
1631                 goto out;
1632         }
1633
1634         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1635         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1636         if (err)
1637                 goto out;
1638         if (rdev->me_fw->size != me_req_size) {
1639                 printk(KERN_ERR
1640                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1641                        rdev->me_fw->size, fw_name);
1642                 err = -EINVAL;
1643         }
1644
1645         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1646         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1647         if (err)
1648                 goto out;
1649         if (rdev->ce_fw->size != ce_req_size) {
1650                 printk(KERN_ERR
1651                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1652                        rdev->ce_fw->size, fw_name);
1653                 err = -EINVAL;
1654         }
1655
1656         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1657         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1658         if (err)
1659                 goto out;
1660         if (rdev->rlc_fw->size != rlc_req_size) {
1661                 printk(KERN_ERR
1662                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1663                        rdev->rlc_fw->size, fw_name);
1664                 err = -EINVAL;
1665         }
1666
1667         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1668         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1669         if (err) {
1670                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1671                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1672                 if (err)
1673                         goto out;
1674         }
1675         if ((rdev->mc_fw->size != mc_req_size) &&
1676             (rdev->mc_fw->size != mc2_req_size)) {
1677                 printk(KERN_ERR
1678                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1679                        rdev->mc_fw->size, fw_name);
1680                 err = -EINVAL;
1681         }
1682         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1683
1684         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1685         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1686         if (err) {
1687                 printk(KERN_ERR
1688                        "smc: error loading firmware \"%s\"\n",
1689                        fw_name);
1690                 release_firmware(rdev->smc_fw);
1691                 rdev->smc_fw = NULL;
1692                 err = 0;
1693         } else if (rdev->smc_fw->size != smc_req_size) {
1694                 printk(KERN_ERR
1695                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1696                        rdev->smc_fw->size, fw_name);
1697                 err = -EINVAL;
1698         }
1699
1700 out:
1701         if (err) {
1702                 if (err != -EINVAL)
1703                         printk(KERN_ERR
1704                                "si_cp: Failed to load firmware \"%s\"\n",
1705                                fw_name);
1706                 release_firmware(rdev->pfp_fw);
1707                 rdev->pfp_fw = NULL;
1708                 release_firmware(rdev->me_fw);
1709                 rdev->me_fw = NULL;
1710                 release_firmware(rdev->ce_fw);
1711                 rdev->ce_fw = NULL;
1712                 release_firmware(rdev->rlc_fw);
1713                 rdev->rlc_fw = NULL;
1714                 release_firmware(rdev->mc_fw);
1715                 rdev->mc_fw = NULL;
1716                 release_firmware(rdev->smc_fw);
1717                 rdev->smc_fw = NULL;
1718         }
1719         return err;
1720 }
1721
1722 /* watermark setup */
1723 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1724                                    struct radeon_crtc *radeon_crtc,
1725                                    struct drm_display_mode *mode,
1726                                    struct drm_display_mode *other_mode)
1727 {
1728         u32 tmp, buffer_alloc, i;
1729         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1730         /*
1731          * Line Buffer Setup
1732          * There are 3 line buffers, each one shared by 2 display controllers.
1733          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1734          * the display controllers.  The paritioning is done via one of four
1735          * preset allocations specified in bits 21:20:
1736          *  0 - half lb
1737          *  2 - whole lb, other crtc must be disabled
1738          */
1739         /* this can get tricky if we have two large displays on a paired group
1740          * of crtcs.  Ideally for multiple large displays we'd assign them to
1741          * non-linked crtcs for maximum line buffer allocation.
1742          */
1743         if (radeon_crtc->base.enabled && mode) {
1744                 if (other_mode) {
1745                         tmp = 0; /* 1/2 */
1746                         buffer_alloc = 1;
1747                 } else {
1748                         tmp = 2; /* whole */
1749                         buffer_alloc = 2;
1750                 }
1751         } else {
1752                 tmp = 0;
1753                 buffer_alloc = 0;
1754         }
1755
1756         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1757                DC_LB_MEMORY_CONFIG(tmp));
1758
1759         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1760                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1761         for (i = 0; i < rdev->usec_timeout; i++) {
1762                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1763                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1764                         break;
1765                 udelay(1);
1766         }
1767
1768         if (radeon_crtc->base.enabled && mode) {
1769                 switch (tmp) {
1770                 case 0:
1771                 default:
1772                         return 4096 * 2;
1773                 case 2:
1774                         return 8192 * 2;
1775                 }
1776         }
1777
1778         /* controller not enabled, so no lb used */
1779         return 0;
1780 }
1781
1782 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1783 {
1784         u32 tmp = RREG32(MC_SHARED_CHMAP);
1785
1786         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1787         case 0:
1788         default:
1789                 return 1;
1790         case 1:
1791                 return 2;
1792         case 2:
1793                 return 4;
1794         case 3:
1795                 return 8;
1796         case 4:
1797                 return 3;
1798         case 5:
1799                 return 6;
1800         case 6:
1801                 return 10;
1802         case 7:
1803                 return 12;
1804         case 8:
1805                 return 16;
1806         }
1807 }
1808
1809 struct dce6_wm_params {
1810         u32 dram_channels; /* number of dram channels */
1811         u32 yclk;          /* bandwidth per dram data pin in kHz */
1812         u32 sclk;          /* engine clock in kHz */
1813         u32 disp_clk;      /* display clock in kHz */
1814         u32 src_width;     /* viewport width */
1815         u32 active_time;   /* active display time in ns */
1816         u32 blank_time;    /* blank time in ns */
1817         bool interlaced;    /* mode is interlaced */
1818         fixed20_12 vsc;    /* vertical scale ratio */
1819         u32 num_heads;     /* number of active crtcs */
1820         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1821         u32 lb_size;       /* line buffer allocated to pipe */
1822         u32 vtaps;         /* vertical scaler taps */
1823 };
1824
1825 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1826 {
1827         /* Calculate raw DRAM Bandwidth */
1828         fixed20_12 dram_efficiency; /* 0.7 */
1829         fixed20_12 yclk, dram_channels, bandwidth;
1830         fixed20_12 a;
1831
1832         a.full = dfixed_const(1000);
1833         yclk.full = dfixed_const(wm->yclk);
1834         yclk.full = dfixed_div(yclk, a);
1835         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1836         a.full = dfixed_const(10);
1837         dram_efficiency.full = dfixed_const(7);
1838         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1839         bandwidth.full = dfixed_mul(dram_channels, yclk);
1840         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1841
1842         return dfixed_trunc(bandwidth);
1843 }
1844
1845 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1846 {
1847         /* Calculate DRAM Bandwidth and the part allocated to display. */
1848         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1849         fixed20_12 yclk, dram_channels, bandwidth;
1850         fixed20_12 a;
1851
1852         a.full = dfixed_const(1000);
1853         yclk.full = dfixed_const(wm->yclk);
1854         yclk.full = dfixed_div(yclk, a);
1855         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1856         a.full = dfixed_const(10);
1857         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1858         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1859         bandwidth.full = dfixed_mul(dram_channels, yclk);
1860         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1861
1862         return dfixed_trunc(bandwidth);
1863 }
1864
1865 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1866 {
1867         /* Calculate the display Data return Bandwidth */
1868         fixed20_12 return_efficiency; /* 0.8 */
1869         fixed20_12 sclk, bandwidth;
1870         fixed20_12 a;
1871
1872         a.full = dfixed_const(1000);
1873         sclk.full = dfixed_const(wm->sclk);
1874         sclk.full = dfixed_div(sclk, a);
1875         a.full = dfixed_const(10);
1876         return_efficiency.full = dfixed_const(8);
1877         return_efficiency.full = dfixed_div(return_efficiency, a);
1878         a.full = dfixed_const(32);
1879         bandwidth.full = dfixed_mul(a, sclk);
1880         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1881
1882         return dfixed_trunc(bandwidth);
1883 }
1884
1885 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1886 {
1887         return 32;
1888 }
1889
1890 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1891 {
1892         /* Calculate the DMIF Request Bandwidth */
1893         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1894         fixed20_12 disp_clk, sclk, bandwidth;
1895         fixed20_12 a, b1, b2;
1896         u32 min_bandwidth;
1897
1898         a.full = dfixed_const(1000);
1899         disp_clk.full = dfixed_const(wm->disp_clk);
1900         disp_clk.full = dfixed_div(disp_clk, a);
1901         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1902         b1.full = dfixed_mul(a, disp_clk);
1903
1904         a.full = dfixed_const(1000);
1905         sclk.full = dfixed_const(wm->sclk);
1906         sclk.full = dfixed_div(sclk, a);
1907         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1908         b2.full = dfixed_mul(a, sclk);
1909
1910         a.full = dfixed_const(10);
1911         disp_clk_request_efficiency.full = dfixed_const(8);
1912         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1913
1914         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1915
1916         a.full = dfixed_const(min_bandwidth);
1917         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1918
1919         return dfixed_trunc(bandwidth);
1920 }
1921
1922 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1923 {
1924         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1925         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1926         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1927         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1928
1929         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1930 }
1931
1932 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1933 {
1934         /* Calculate the display mode Average Bandwidth
1935          * DisplayMode should contain the source and destination dimensions,
1936          * timing, etc.
1937          */
1938         fixed20_12 bpp;
1939         fixed20_12 line_time;
1940         fixed20_12 src_width;
1941         fixed20_12 bandwidth;
1942         fixed20_12 a;
1943
1944         a.full = dfixed_const(1000);
1945         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1946         line_time.full = dfixed_div(line_time, a);
1947         bpp.full = dfixed_const(wm->bytes_per_pixel);
1948         src_width.full = dfixed_const(wm->src_width);
1949         bandwidth.full = dfixed_mul(src_width, bpp);
1950         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1951         bandwidth.full = dfixed_div(bandwidth, line_time);
1952
1953         return dfixed_trunc(bandwidth);
1954 }
1955
1956 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1957 {
1958         /* First calcualte the latency in ns */
1959         u32 mc_latency = 2000; /* 2000 ns. */
1960         u32 available_bandwidth = dce6_available_bandwidth(wm);
1961         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1962         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1963         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1964         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1965                 (wm->num_heads * cursor_line_pair_return_time);
1966         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1967         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1968         u32 tmp, dmif_size = 12288;
1969         fixed20_12 a, b, c;
1970
1971         if (wm->num_heads == 0)
1972                 return 0;
1973
1974         a.full = dfixed_const(2);
1975         b.full = dfixed_const(1);
1976         if ((wm->vsc.full > a.full) ||
1977             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1978             (wm->vtaps >= 5) ||
1979             ((wm->vsc.full >= a.full) && wm->interlaced))
1980                 max_src_lines_per_dst_line = 4;
1981         else
1982                 max_src_lines_per_dst_line = 2;
1983
1984         a.full = dfixed_const(available_bandwidth);
1985         b.full = dfixed_const(wm->num_heads);
1986         a.full = dfixed_div(a, b);
1987
1988         b.full = dfixed_const(mc_latency + 512);
1989         c.full = dfixed_const(wm->disp_clk);
1990         b.full = dfixed_div(b, c);
1991
1992         c.full = dfixed_const(dmif_size);
1993         b.full = dfixed_div(c, b);
1994
1995         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1996
1997         b.full = dfixed_const(1000);
1998         c.full = dfixed_const(wm->disp_clk);
1999         b.full = dfixed_div(c, b);
2000         c.full = dfixed_const(wm->bytes_per_pixel);
2001         b.full = dfixed_mul(b, c);
2002
2003         lb_fill_bw = min(tmp, dfixed_trunc(b));
2004
2005         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2006         b.full = dfixed_const(1000);
2007         c.full = dfixed_const(lb_fill_bw);
2008         b.full = dfixed_div(c, b);
2009         a.full = dfixed_div(a, b);
2010         line_fill_time = dfixed_trunc(a);
2011
2012         if (line_fill_time < wm->active_time)
2013                 return latency;
2014         else
2015                 return latency + (line_fill_time - wm->active_time);
2016
2017 }
2018
2019 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2020 {
2021         if (dce6_average_bandwidth(wm) <=
2022             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2023                 return true;
2024         else
2025                 return false;
2026 };
2027
2028 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2029 {
2030         if (dce6_average_bandwidth(wm) <=
2031             (dce6_available_bandwidth(wm) / wm->num_heads))
2032                 return true;
2033         else
2034                 return false;
2035 };
2036
2037 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2038 {
2039         u32 lb_partitions = wm->lb_size / wm->src_width;
2040         u32 line_time = wm->active_time + wm->blank_time;
2041         u32 latency_tolerant_lines;
2042         u32 latency_hiding;
2043         fixed20_12 a;
2044
2045         a.full = dfixed_const(1);
2046         if (wm->vsc.full > a.full)
2047                 latency_tolerant_lines = 1;
2048         else {
2049                 if (lb_partitions <= (wm->vtaps + 1))
2050                         latency_tolerant_lines = 1;
2051                 else
2052                         latency_tolerant_lines = 2;
2053         }
2054
2055         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2056
2057         if (dce6_latency_watermark(wm) <= latency_hiding)
2058                 return true;
2059         else
2060                 return false;
2061 }
2062
2063 static void dce6_program_watermarks(struct radeon_device *rdev,
2064                                          struct radeon_crtc *radeon_crtc,
2065                                          u32 lb_size, u32 num_heads)
2066 {
2067         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2068         struct dce6_wm_params wm_low, wm_high;
2069         u32 dram_channels;
2070         u32 pixel_period;
2071         u32 line_time = 0;
2072         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2073         u32 priority_a_mark = 0, priority_b_mark = 0;
2074         u32 priority_a_cnt = PRIORITY_OFF;
2075         u32 priority_b_cnt = PRIORITY_OFF;
2076         u32 tmp, arb_control3;
2077         fixed20_12 a, b, c;
2078
2079         if (radeon_crtc->base.enabled && num_heads && mode) {
2080                 pixel_period = 1000000 / (u32)mode->clock;
2081                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2082                 priority_a_cnt = 0;
2083                 priority_b_cnt = 0;
2084
2085                 if (rdev->family == CHIP_ARUBA)
2086                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2087                 else
2088                         dram_channels = si_get_number_of_dram_channels(rdev);
2089
2090                 /* watermark for high clocks */
2091                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2092                         wm_high.yclk =
2093                                 radeon_dpm_get_mclk(rdev, false) * 10;
2094                         wm_high.sclk =
2095                                 radeon_dpm_get_sclk(rdev, false) * 10;
2096                 } else {
2097                         wm_high.yclk = rdev->pm.current_mclk * 10;
2098                         wm_high.sclk = rdev->pm.current_sclk * 10;
2099                 }
2100
2101                 wm_high.disp_clk = mode->clock;
2102                 wm_high.src_width = mode->crtc_hdisplay;
2103                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2104                 wm_high.blank_time = line_time - wm_high.active_time;
2105                 wm_high.interlaced = false;
2106                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2107                         wm_high.interlaced = true;
2108                 wm_high.vsc = radeon_crtc->vsc;
2109                 wm_high.vtaps = 1;
2110                 if (radeon_crtc->rmx_type != RMX_OFF)
2111                         wm_high.vtaps = 2;
2112                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2113                 wm_high.lb_size = lb_size;
2114                 wm_high.dram_channels = dram_channels;
2115                 wm_high.num_heads = num_heads;
2116
2117                 /* watermark for low clocks */
2118                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2119                         wm_low.yclk =
2120                                 radeon_dpm_get_mclk(rdev, true) * 10;
2121                         wm_low.sclk =
2122                                 radeon_dpm_get_sclk(rdev, true) * 10;
2123                 } else {
2124                         wm_low.yclk = rdev->pm.current_mclk * 10;
2125                         wm_low.sclk = rdev->pm.current_sclk * 10;
2126                 }
2127
2128                 wm_low.disp_clk = mode->clock;
2129                 wm_low.src_width = mode->crtc_hdisplay;
2130                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2131                 wm_low.blank_time = line_time - wm_low.active_time;
2132                 wm_low.interlaced = false;
2133                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2134                         wm_low.interlaced = true;
2135                 wm_low.vsc = radeon_crtc->vsc;
2136                 wm_low.vtaps = 1;
2137                 if (radeon_crtc->rmx_type != RMX_OFF)
2138                         wm_low.vtaps = 2;
2139                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2140                 wm_low.lb_size = lb_size;
2141                 wm_low.dram_channels = dram_channels;
2142                 wm_low.num_heads = num_heads;
2143
2144                 /* set for high clocks */
2145                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2146                 /* set for low clocks */
2147                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2148
2149                 /* possibly force display priority to high */
2150                 /* should really do this at mode validation time... */
2151                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2152                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2153                     !dce6_check_latency_hiding(&wm_high) ||
2154                     (rdev->disp_priority == 2)) {
2155                         DRM_DEBUG_KMS("force priority to high\n");
2156                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2157                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2158                 }
2159                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2160                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2161                     !dce6_check_latency_hiding(&wm_low) ||
2162                     (rdev->disp_priority == 2)) {
2163                         DRM_DEBUG_KMS("force priority to high\n");
2164                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2165                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2166                 }
2167
2168                 a.full = dfixed_const(1000);
2169                 b.full = dfixed_const(mode->clock);
2170                 b.full = dfixed_div(b, a);
2171                 c.full = dfixed_const(latency_watermark_a);
2172                 c.full = dfixed_mul(c, b);
2173                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2174                 c.full = dfixed_div(c, a);
2175                 a.full = dfixed_const(16);
2176                 c.full = dfixed_div(c, a);
2177                 priority_a_mark = dfixed_trunc(c);
2178                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2179
2180                 a.full = dfixed_const(1000);
2181                 b.full = dfixed_const(mode->clock);
2182                 b.full = dfixed_div(b, a);
2183                 c.full = dfixed_const(latency_watermark_b);
2184                 c.full = dfixed_mul(c, b);
2185                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2186                 c.full = dfixed_div(c, a);
2187                 a.full = dfixed_const(16);
2188                 c.full = dfixed_div(c, a);
2189                 priority_b_mark = dfixed_trunc(c);
2190                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2191         }
2192
2193         /* select wm A */
2194         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2195         tmp = arb_control3;
2196         tmp &= ~LATENCY_WATERMARK_MASK(3);
2197         tmp |= LATENCY_WATERMARK_MASK(1);
2198         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2199         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2200                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2201                 LATENCY_HIGH_WATERMARK(line_time)));
2202         /* select wm B */
2203         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2204         tmp &= ~LATENCY_WATERMARK_MASK(3);
2205         tmp |= LATENCY_WATERMARK_MASK(2);
2206         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2207         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2208                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2209                 LATENCY_HIGH_WATERMARK(line_time)));
2210         /* restore original selection */
2211         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2212
2213         /* write the priority marks */
2214         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2215         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2216
2217         /* save values for DPM */
2218         radeon_crtc->line_time = line_time;
2219         radeon_crtc->wm_high = latency_watermark_a;
2220         radeon_crtc->wm_low = latency_watermark_b;
2221 }
2222
2223 void dce6_bandwidth_update(struct radeon_device *rdev)
2224 {
2225         struct drm_display_mode *mode0 = NULL;
2226         struct drm_display_mode *mode1 = NULL;
2227         u32 num_heads = 0, lb_size;
2228         int i;
2229
2230         radeon_update_display_priority(rdev);
2231
2232         for (i = 0; i < rdev->num_crtc; i++) {
2233                 if (rdev->mode_info.crtcs[i]->base.enabled)
2234                         num_heads++;
2235         }
2236         for (i = 0; i < rdev->num_crtc; i += 2) {
2237                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2238                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2239                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2240                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2241                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2242                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2243         }
2244 }
2245
2246 /*
2247  * Core functions
2248  */
2249 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2250 {
2251         const u32 num_tile_mode_states = 32;
2252         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2253
2254         switch (rdev->config.si.mem_row_size_in_kb) {
2255         case 1:
2256                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2257                 break;
2258         case 2:
2259         default:
2260                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2261                 break;
2262         case 4:
2263                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2264                 break;
2265         }
2266
2267         if ((rdev->family == CHIP_TAHITI) ||
2268             (rdev->family == CHIP_PITCAIRN)) {
2269                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2270                         switch (reg_offset) {
2271                         case 0:  /* non-AA compressed depth or any compressed stencil */
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2275                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2276                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2277                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2279                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2280                                 break;
2281                         case 1:  /* 2xAA/4xAA compressed depth only */
2282                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2284                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2285                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2286                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2287                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2290                                 break;
2291                         case 2:  /* 8xAA compressed depth only */
2292                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2293                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2294                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2295                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2296                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2297                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2300                                 break;
2301                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2302                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2304                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2305                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2306                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2307                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2309                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2310                                 break;
2311                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2314                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2315                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2316                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2317                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2319                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2320                                 break;
2321                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  TILE_SPLIT(split_equal_to_row_size) |
2326                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2327                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2329                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2330                                 break;
2331                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2332                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2334                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2335                                                  TILE_SPLIT(split_equal_to_row_size) |
2336                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2337                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2338                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2339                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2340                                 break;
2341                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2342                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2344                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2345                                                  TILE_SPLIT(split_equal_to_row_size) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2347                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2349                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2350                                 break;
2351                         case 8:  /* 1D and 1D Array Surfaces */
2352                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2353                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2355                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2356                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2357                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2359                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2360                                 break;
2361                         case 9:  /* Displayable maps. */
2362                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2363                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2365                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2366                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2367                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2370                                 break;
2371                         case 10:  /* Display 8bpp. */
2372                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2374                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2375                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2376                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2377                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2380                                 break;
2381                         case 11:  /* Display 16bpp. */
2382                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2384                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2385                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2386                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2387                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2390                                 break;
2391                         case 12:  /* Display 32bpp. */
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2395                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2396                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2397                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2400                                 break;
2401                         case 13:  /* Thin. */
2402                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2404                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2405                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2407                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2409                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2410                                 break;
2411                         case 14:  /* Thin 8 bpp. */
2412                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2414                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2415                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2416                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2417                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2420                                 break;
2421                         case 15:  /* Thin 16 bpp. */
2422                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2424                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2425                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2426                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2427                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2429                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2430                                 break;
2431                         case 16:  /* Thin 32 bpp. */
2432                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2434                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2435                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2436                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2437                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2440                                 break;
2441                         case 17:  /* Thin 64 bpp. */
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2445                                                  TILE_SPLIT(split_equal_to_row_size) |
2446                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2447                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2449                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2450                                 break;
2451                         case 21:  /* 8 bpp PRT. */
2452                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2454                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2455                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2456                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2457                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2460                                 break;
2461                         case 22:  /* 16 bpp PRT */
2462                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2464                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2465                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2466                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2467                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2469                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2470                                 break;
2471                         case 23:  /* 32 bpp PRT */
2472                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2473                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2474                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2475                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2476                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2477                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2480                                 break;
2481                         case 24:  /* 64 bpp PRT */
2482                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2483                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2484                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2485                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2486                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2487                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2490                                 break;
2491                         case 25:  /* 128 bpp PRT */
2492                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2494                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2495                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2496                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2497                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2500                                 break;
2501                         default:
2502                                 gb_tile_moden = 0;
2503                                 break;
2504                         }
2505                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2506                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2507                 }
2508         } else if ((rdev->family == CHIP_VERDE) ||
2509                    (rdev->family == CHIP_OLAND) ||
2510                    (rdev->family == CHIP_HAINAN)) {
2511                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2512                         switch (reg_offset) {
2513                         case 0:  /* non-AA compressed depth or any compressed stencil */
2514                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2517                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2518                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2519                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2521                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2522                                 break;
2523                         case 1:  /* 2xAA/4xAA compressed depth only */
2524                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2527                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2528                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2529                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2532                                 break;
2533                         case 2:  /* 8xAA compressed depth only */
2534                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2536                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2537                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2538                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2539                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2542                                 break;
2543                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2544                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2547                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2548                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2549                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2552                                 break;
2553                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2554                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2556                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2557                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2558                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2559                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2562                                 break;
2563                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2564                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2566                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567                                                  TILE_SPLIT(split_equal_to_row_size) |
2568                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2569                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2572                                 break;
2573                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2574                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2576                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2577                                                  TILE_SPLIT(split_equal_to_row_size) |
2578                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2579                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2582                                 break;
2583                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2584                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2587                                                  TILE_SPLIT(split_equal_to_row_size) |
2588                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2589                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2591                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2592                                 break;
2593                         case 8:  /* 1D and 1D Array Surfaces */
2594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2595                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2597                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2598                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2599                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2601                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2602                                 break;
2603                         case 9:  /* Displayable maps. */
2604                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2605                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2606                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2609                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2612                                 break;
2613                         case 10:  /* Display 8bpp. */
2614                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2617                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2619                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2622                                 break;
2623                         case 11:  /* Display 16bpp. */
2624                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2628                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2629                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2631                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2632                                 break;
2633                         case 12:  /* Display 32bpp. */
2634                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2635                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2637                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2638                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2639                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2642                                 break;
2643                         case 13:  /* Thin. */
2644                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2645                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2646                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2647                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2648                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2649                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2651                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2652                                 break;
2653                         case 14:  /* Thin 8 bpp. */
2654                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2656                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2657                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2658                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2659                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2662                                 break;
2663                         case 15:  /* Thin 16 bpp. */
2664                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2667                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2668                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2669                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2671                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2672                                 break;
2673                         case 16:  /* Thin 32 bpp. */
2674                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2676                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2678                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2679                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2682                                 break;
2683                         case 17:  /* Thin 64 bpp. */
2684                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2686                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687                                                  TILE_SPLIT(split_equal_to_row_size) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2689                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2691                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2692                                 break;
2693                         case 21:  /* 8 bpp PRT. */
2694                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2698                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2699                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2700                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2701                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2702                                 break;
2703                         case 22:  /* 16 bpp PRT */
2704                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2706                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2707                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2708                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2709                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2711                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2712                                 break;
2713                         case 23:  /* 32 bpp PRT */
2714                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2716                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2717                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2718                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2719                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2721                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2722                                 break;
2723                         case 24:  /* 64 bpp PRT */
2724                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2726                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2727                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2728                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2729                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2731                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2732                                 break;
2733                         case 25:  /* 128 bpp PRT */
2734                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2735                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2736                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2737                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2738                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2739                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2742                                 break;
2743                         default:
2744                                 gb_tile_moden = 0;
2745                                 break;
2746                         }
2747                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2748                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2749                 }
2750         } else
2751                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2752 }
2753
2754 static void si_select_se_sh(struct radeon_device *rdev,
2755                             u32 se_num, u32 sh_num)
2756 {
2757         u32 data = INSTANCE_BROADCAST_WRITES;
2758
2759         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2760                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2761         else if (se_num == 0xffffffff)
2762                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2763         else if (sh_num == 0xffffffff)
2764                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2765         else
2766                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2767         WREG32(GRBM_GFX_INDEX, data);
2768 }
2769
2770 static u32 si_create_bitmask(u32 bit_width)
2771 {
2772         u32 i, mask = 0;
2773
2774         for (i = 0; i < bit_width; i++) {
2775                 mask <<= 1;
2776                 mask |= 1;
2777         }
2778         return mask;
2779 }
2780
2781 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2782 {
2783         u32 data, mask;
2784
2785         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2786         if (data & 1)
2787                 data &= INACTIVE_CUS_MASK;
2788         else
2789                 data = 0;
2790         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2791
2792         data >>= INACTIVE_CUS_SHIFT;
2793
2794         mask = si_create_bitmask(cu_per_sh);
2795
2796         return ~data & mask;
2797 }
2798
2799 static void si_setup_spi(struct radeon_device *rdev,
2800                          u32 se_num, u32 sh_per_se,
2801                          u32 cu_per_sh)
2802 {
2803         int i, j, k;
2804         u32 data, mask, active_cu;
2805
2806         for (i = 0; i < se_num; i++) {
2807                 for (j = 0; j < sh_per_se; j++) {
2808                         si_select_se_sh(rdev, i, j);
2809                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2810                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2811
2812                         mask = 1;
2813                         for (k = 0; k < 16; k++) {
2814                                 mask <<= k;
2815                                 if (active_cu & mask) {
2816                                         data &= ~mask;
2817                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2818                                         break;
2819                                 }
2820                         }
2821                 }
2822         }
2823         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2824 }
2825
2826 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2827                               u32 max_rb_num_per_se,
2828                               u32 sh_per_se)
2829 {
2830         u32 data, mask;
2831
2832         data = RREG32(CC_RB_BACKEND_DISABLE);
2833         if (data & 1)
2834                 data &= BACKEND_DISABLE_MASK;
2835         else
2836                 data = 0;
2837         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2838
2839         data >>= BACKEND_DISABLE_SHIFT;
2840
2841         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2842
2843         return data & mask;
2844 }
2845
2846 static void si_setup_rb(struct radeon_device *rdev,
2847                         u32 se_num, u32 sh_per_se,
2848                         u32 max_rb_num_per_se)
2849 {
2850         int i, j;
2851         u32 data, mask;
2852         u32 disabled_rbs = 0;
2853         u32 enabled_rbs = 0;
2854
2855         for (i = 0; i < se_num; i++) {
2856                 for (j = 0; j < sh_per_se; j++) {
2857                         si_select_se_sh(rdev, i, j);
2858                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2859                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2860                 }
2861         }
2862         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2863
2864         mask = 1;
2865         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2866                 if (!(disabled_rbs & mask))
2867                         enabled_rbs |= mask;
2868                 mask <<= 1;
2869         }
2870
2871         rdev->config.si.backend_enable_mask = enabled_rbs;
2872
2873         for (i = 0; i < se_num; i++) {
2874                 si_select_se_sh(rdev, i, 0xffffffff);
2875                 data = 0;
2876                 for (j = 0; j < sh_per_se; j++) {
2877                         switch (enabled_rbs & 3) {
2878                         case 1:
2879                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2880                                 break;
2881                         case 2:
2882                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2883                                 break;
2884                         case 3:
2885                         default:
2886                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2887                                 break;
2888                         }
2889                         enabled_rbs >>= 2;
2890                 }
2891                 WREG32(PA_SC_RASTER_CONFIG, data);
2892         }
2893         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2894 }
2895
2896 static void si_gpu_init(struct radeon_device *rdev)
2897 {
2898         u32 gb_addr_config = 0;
2899         u32 mc_shared_chmap, mc_arb_ramcfg;
2900         u32 sx_debug_1;
2901         u32 hdp_host_path_cntl;
2902         u32 tmp;
2903         int i, j;
2904
2905         switch (rdev->family) {
2906         case CHIP_TAHITI:
2907                 rdev->config.si.max_shader_engines = 2;
2908                 rdev->config.si.max_tile_pipes = 12;
2909                 rdev->config.si.max_cu_per_sh = 8;
2910                 rdev->config.si.max_sh_per_se = 2;
2911                 rdev->config.si.max_backends_per_se = 4;
2912                 rdev->config.si.max_texture_channel_caches = 12;
2913                 rdev->config.si.max_gprs = 256;
2914                 rdev->config.si.max_gs_threads = 32;
2915                 rdev->config.si.max_hw_contexts = 8;
2916
2917                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2918                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2919                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2920                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2921                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2922                 break;
2923         case CHIP_PITCAIRN:
2924                 rdev->config.si.max_shader_engines = 2;
2925                 rdev->config.si.max_tile_pipes = 8;
2926                 rdev->config.si.max_cu_per_sh = 5;
2927                 rdev->config.si.max_sh_per_se = 2;
2928                 rdev->config.si.max_backends_per_se = 4;
2929                 rdev->config.si.max_texture_channel_caches = 8;
2930                 rdev->config.si.max_gprs = 256;
2931                 rdev->config.si.max_gs_threads = 32;
2932                 rdev->config.si.max_hw_contexts = 8;
2933
2934                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2935                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2936                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2937                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2938                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2939                 break;
2940         case CHIP_VERDE:
2941         default:
2942                 rdev->config.si.max_shader_engines = 1;
2943                 rdev->config.si.max_tile_pipes = 4;
2944                 rdev->config.si.max_cu_per_sh = 5;
2945                 rdev->config.si.max_sh_per_se = 2;
2946                 rdev->config.si.max_backends_per_se = 4;
2947                 rdev->config.si.max_texture_channel_caches = 4;
2948                 rdev->config.si.max_gprs = 256;
2949                 rdev->config.si.max_gs_threads = 32;
2950                 rdev->config.si.max_hw_contexts = 8;
2951
2952                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2953                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2954                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2955                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2956                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2957                 break;
2958         case CHIP_OLAND:
2959                 rdev->config.si.max_shader_engines = 1;
2960                 rdev->config.si.max_tile_pipes = 4;
2961                 rdev->config.si.max_cu_per_sh = 6;
2962                 rdev->config.si.max_sh_per_se = 1;
2963                 rdev->config.si.max_backends_per_se = 2;
2964                 rdev->config.si.max_texture_channel_caches = 4;
2965                 rdev->config.si.max_gprs = 256;
2966                 rdev->config.si.max_gs_threads = 16;
2967                 rdev->config.si.max_hw_contexts = 8;
2968
2969                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2970                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2971                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2972                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2973                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2974                 break;
2975         case CHIP_HAINAN:
2976                 rdev->config.si.max_shader_engines = 1;
2977                 rdev->config.si.max_tile_pipes = 4;
2978                 rdev->config.si.max_cu_per_sh = 5;
2979                 rdev->config.si.max_sh_per_se = 1;
2980                 rdev->config.si.max_backends_per_se = 1;
2981                 rdev->config.si.max_texture_channel_caches = 2;
2982                 rdev->config.si.max_gprs = 256;
2983                 rdev->config.si.max_gs_threads = 16;
2984                 rdev->config.si.max_hw_contexts = 8;
2985
2986                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2987                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2988                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2989                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2990                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2991                 break;
2992         }
2993
2994         /* Initialize HDP */
2995         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2996                 WREG32((0x2c14 + j), 0x00000000);
2997                 WREG32((0x2c18 + j), 0x00000000);
2998                 WREG32((0x2c1c + j), 0x00000000);
2999                 WREG32((0x2c20 + j), 0x00000000);
3000                 WREG32((0x2c24 + j), 0x00000000);
3001         }
3002
3003         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3004
3005         evergreen_fix_pci_max_read_req_size(rdev);
3006
3007         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3008
3009         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3010         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3011
3012         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3013         rdev->config.si.mem_max_burst_length_bytes = 256;
3014         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3015         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3016         if (rdev->config.si.mem_row_size_in_kb > 4)
3017                 rdev->config.si.mem_row_size_in_kb = 4;
3018         /* XXX use MC settings? */
3019         rdev->config.si.shader_engine_tile_size = 32;
3020         rdev->config.si.num_gpus = 1;
3021         rdev->config.si.multi_gpu_tile_size = 64;
3022
3023         /* fix up row size */
3024         gb_addr_config &= ~ROW_SIZE_MASK;
3025         switch (rdev->config.si.mem_row_size_in_kb) {
3026         case 1:
3027         default:
3028                 gb_addr_config |= ROW_SIZE(0);
3029                 break;
3030         case 2:
3031                 gb_addr_config |= ROW_SIZE(1);
3032                 break;
3033         case 4:
3034                 gb_addr_config |= ROW_SIZE(2);
3035                 break;
3036         }
3037
3038         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3039          * not have bank info, so create a custom tiling dword.
3040          * bits 3:0   num_pipes
3041          * bits 7:4   num_banks
3042          * bits 11:8  group_size
3043          * bits 15:12 row_size
3044          */
3045         rdev->config.si.tile_config = 0;
3046         switch (rdev->config.si.num_tile_pipes) {
3047         case 1:
3048                 rdev->config.si.tile_config |= (0 << 0);
3049                 break;
3050         case 2:
3051                 rdev->config.si.tile_config |= (1 << 0);
3052                 break;
3053         case 4:
3054                 rdev->config.si.tile_config |= (2 << 0);
3055                 break;
3056         case 8:
3057         default:
3058                 /* XXX what about 12? */
3059                 rdev->config.si.tile_config |= (3 << 0);
3060                 break;
3061         }       
3062         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3063         case 0: /* four banks */
3064                 rdev->config.si.tile_config |= 0 << 4;
3065                 break;
3066         case 1: /* eight banks */
3067                 rdev->config.si.tile_config |= 1 << 4;
3068                 break;
3069         case 2: /* sixteen banks */
3070         default:
3071                 rdev->config.si.tile_config |= 2 << 4;
3072                 break;
3073         }
3074         rdev->config.si.tile_config |=
3075                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3076         rdev->config.si.tile_config |=
3077                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3078
3079         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3080         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3081         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3082         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3083         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3084         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3085         if (rdev->has_uvd) {
3086                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3087                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3088                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3089         }
3090
3091         si_tiling_mode_table_init(rdev);
3092
3093         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3094                     rdev->config.si.max_sh_per_se,
3095                     rdev->config.si.max_backends_per_se);
3096
3097         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3098                      rdev->config.si.max_sh_per_se,
3099                      rdev->config.si.max_cu_per_sh);
3100
3101
3102         /* set HW defaults for 3D engine */
3103         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3104                                      ROQ_IB2_START(0x2b)));
3105         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3106
3107         sx_debug_1 = RREG32(SX_DEBUG_1);
3108         WREG32(SX_DEBUG_1, sx_debug_1);
3109
3110         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3111
3112         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3113                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3114                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3115                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3116
3117         WREG32(VGT_NUM_INSTANCES, 1);
3118
3119         WREG32(CP_PERFMON_CNTL, 0);
3120
3121         WREG32(SQ_CONFIG, 0);
3122
3123         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3124                                           FORCE_EOV_MAX_REZ_CNT(255)));
3125
3126         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3127                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3128
3129         WREG32(VGT_GS_VERTEX_REUSE, 16);
3130         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3131
3132         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3133         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3134         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3135         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3136         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3137         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3138         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3139         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3140
3141         tmp = RREG32(HDP_MISC_CNTL);
3142         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3143         WREG32(HDP_MISC_CNTL, tmp);
3144
3145         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3146         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3147
3148         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3149
3150         udelay(50);
3151 }
3152
3153 /*
3154  * GPU scratch registers helpers function.
3155  */
3156 static void si_scratch_init(struct radeon_device *rdev)
3157 {
3158         int i;
3159
3160         rdev->scratch.num_reg = 7;
3161         rdev->scratch.reg_base = SCRATCH_REG0;
3162         for (i = 0; i < rdev->scratch.num_reg; i++) {
3163                 rdev->scratch.free[i] = true;
3164                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3165         }
3166 }
3167
3168 void si_fence_ring_emit(struct radeon_device *rdev,
3169                         struct radeon_fence *fence)
3170 {
3171         struct radeon_ring *ring = &rdev->ring[fence->ring];
3172         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3173
3174         /* flush read cache over gart */
3175         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3176         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3177         radeon_ring_write(ring, 0);
3178         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3179         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3180                           PACKET3_TC_ACTION_ENA |
3181                           PACKET3_SH_KCACHE_ACTION_ENA |
3182                           PACKET3_SH_ICACHE_ACTION_ENA);
3183         radeon_ring_write(ring, 0xFFFFFFFF);
3184         radeon_ring_write(ring, 0);
3185         radeon_ring_write(ring, 10); /* poll interval */
3186         /* EVENT_WRITE_EOP - flush caches, send int */
3187         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3188         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3189         radeon_ring_write(ring, lower_32_bits(addr));
3190         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3191         radeon_ring_write(ring, fence->seq);
3192         radeon_ring_write(ring, 0);
3193 }
3194
3195 /*
3196  * IB stuff
3197  */
3198 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3199 {
3200         struct radeon_ring *ring = &rdev->ring[ib->ring];
3201         u32 header;
3202
3203         if (ib->is_const_ib) {
3204                 /* set switch buffer packet before const IB */
3205                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3206                 radeon_ring_write(ring, 0);
3207
3208                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3209         } else {
3210                 u32 next_rptr;
3211                 if (ring->rptr_save_reg) {
3212                         next_rptr = ring->wptr + 3 + 4 + 8;
3213                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3214                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3215                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3216                         radeon_ring_write(ring, next_rptr);
3217                 } else if (rdev->wb.enabled) {
3218                         next_rptr = ring->wptr + 5 + 4 + 8;
3219                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3220                         radeon_ring_write(ring, (1 << 8));
3221                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3222                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3223                         radeon_ring_write(ring, next_rptr);
3224                 }
3225
3226                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3227         }
3228
3229         radeon_ring_write(ring, header);
3230         radeon_ring_write(ring,
3231 #ifdef __BIG_ENDIAN
3232                           (2 << 0) |
3233 #endif
3234                           (ib->gpu_addr & 0xFFFFFFFC));
3235         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3236         radeon_ring_write(ring, ib->length_dw |
3237                           (ib->vm ? (ib->vm->id << 24) : 0));
3238
3239         if (!ib->is_const_ib) {
3240                 /* flush read cache over gart for this vmid */
3241                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3242                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3243                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3244                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3245                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3246                                   PACKET3_TC_ACTION_ENA |
3247                                   PACKET3_SH_KCACHE_ACTION_ENA |
3248                                   PACKET3_SH_ICACHE_ACTION_ENA);
3249                 radeon_ring_write(ring, 0xFFFFFFFF);
3250                 radeon_ring_write(ring, 0);
3251                 radeon_ring_write(ring, 10); /* poll interval */
3252         }
3253 }
3254
3255 /*
3256  * CP.
3257  */
3258 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3259 {
3260         if (enable)
3261                 WREG32(CP_ME_CNTL, 0);
3262         else {
3263                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3264                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3265                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3266                 WREG32(SCRATCH_UMSK, 0);
3267                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3268                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3269                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3270         }
3271         udelay(50);
3272 }
3273
3274 static int si_cp_load_microcode(struct radeon_device *rdev)
3275 {
3276         const __be32 *fw_data;
3277         int i;
3278
3279         if (!rdev->me_fw || !rdev->pfp_fw)
3280                 return -EINVAL;
3281
3282         si_cp_enable(rdev, false);
3283
3284         /* PFP */
3285         fw_data = (const __be32 *)rdev->pfp_fw->data;
3286         WREG32(CP_PFP_UCODE_ADDR, 0);
3287         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3288                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3289         WREG32(CP_PFP_UCODE_ADDR, 0);
3290
3291         /* CE */
3292         fw_data = (const __be32 *)rdev->ce_fw->data;
3293         WREG32(CP_CE_UCODE_ADDR, 0);
3294         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3295                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3296         WREG32(CP_CE_UCODE_ADDR, 0);
3297
3298         /* ME */
3299         fw_data = (const __be32 *)rdev->me_fw->data;
3300         WREG32(CP_ME_RAM_WADDR, 0);
3301         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3302                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3303         WREG32(CP_ME_RAM_WADDR, 0);
3304
3305         WREG32(CP_PFP_UCODE_ADDR, 0);
3306         WREG32(CP_CE_UCODE_ADDR, 0);
3307         WREG32(CP_ME_RAM_WADDR, 0);
3308         WREG32(CP_ME_RAM_RADDR, 0);
3309         return 0;
3310 }
3311
3312 static int si_cp_start(struct radeon_device *rdev)
3313 {
3314         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3315         int r, i;
3316
3317         r = radeon_ring_lock(rdev, ring, 7 + 4);
3318         if (r) {
3319                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3320                 return r;
3321         }
3322         /* init the CP */
3323         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3324         radeon_ring_write(ring, 0x1);
3325         radeon_ring_write(ring, 0x0);
3326         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3327         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3328         radeon_ring_write(ring, 0);
3329         radeon_ring_write(ring, 0);
3330
3331         /* init the CE partitions */
3332         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3333         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3334         radeon_ring_write(ring, 0xc000);
3335         radeon_ring_write(ring, 0xe000);
3336         radeon_ring_unlock_commit(rdev, ring);
3337
3338         si_cp_enable(rdev, true);
3339
3340         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3341         if (r) {
3342                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3343                 return r;
3344         }
3345
3346         /* setup clear context state */
3347         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3348         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3349
3350         for (i = 0; i < si_default_size; i++)
3351                 radeon_ring_write(ring, si_default_state[i]);
3352
3353         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3354         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3355
3356         /* set clear context state */
3357         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3358         radeon_ring_write(ring, 0);
3359
3360         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3361         radeon_ring_write(ring, 0x00000316);
3362         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3363         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3364
3365         radeon_ring_unlock_commit(rdev, ring);
3366
3367         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3368                 ring = &rdev->ring[i];
3369                 r = radeon_ring_lock(rdev, ring, 2);
3370
3371                 /* clear the compute context state */
3372                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3373                 radeon_ring_write(ring, 0);
3374
3375                 radeon_ring_unlock_commit(rdev, ring);
3376         }
3377
3378         return 0;
3379 }
3380
3381 static void si_cp_fini(struct radeon_device *rdev)
3382 {
3383         struct radeon_ring *ring;
3384         si_cp_enable(rdev, false);
3385
3386         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3387         radeon_ring_fini(rdev, ring);
3388         radeon_scratch_free(rdev, ring->rptr_save_reg);
3389
3390         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3391         radeon_ring_fini(rdev, ring);
3392         radeon_scratch_free(rdev, ring->rptr_save_reg);
3393
3394         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3395         radeon_ring_fini(rdev, ring);
3396         radeon_scratch_free(rdev, ring->rptr_save_reg);
3397 }
3398
3399 static int si_cp_resume(struct radeon_device *rdev)
3400 {
3401         struct radeon_ring *ring;
3402         u32 tmp;
3403         u32 rb_bufsz;
3404         int r;
3405
3406         si_enable_gui_idle_interrupt(rdev, false);
3407
3408         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3409         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3410
3411         /* Set the write pointer delay */
3412         WREG32(CP_RB_WPTR_DELAY, 0);
3413
3414         WREG32(CP_DEBUG, 0);
3415         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3416
3417         /* ring 0 - compute and gfx */
3418         /* Set ring buffer size */
3419         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3420         rb_bufsz = order_base_2(ring->ring_size / 8);
3421         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3422 #ifdef __BIG_ENDIAN
3423         tmp |= BUF_SWAP_32BIT;
3424 #endif
3425         WREG32(CP_RB0_CNTL, tmp);
3426
3427         /* Initialize the ring buffer's read and write pointers */
3428         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3429         ring->wptr = 0;
3430         WREG32(CP_RB0_WPTR, ring->wptr);
3431
3432         /* set the wb address whether it's enabled or not */
3433         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3434         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3435
3436         if (rdev->wb.enabled)
3437                 WREG32(SCRATCH_UMSK, 0xff);
3438         else {
3439                 tmp |= RB_NO_UPDATE;
3440                 WREG32(SCRATCH_UMSK, 0);
3441         }
3442
3443         mdelay(1);
3444         WREG32(CP_RB0_CNTL, tmp);
3445
3446         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3447
3448         /* ring1  - compute only */
3449         /* Set ring buffer size */
3450         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3451         rb_bufsz = order_base_2(ring->ring_size / 8);
3452         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3453 #ifdef __BIG_ENDIAN
3454         tmp |= BUF_SWAP_32BIT;
3455 #endif
3456         WREG32(CP_RB1_CNTL, tmp);
3457
3458         /* Initialize the ring buffer's read and write pointers */
3459         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3460         ring->wptr = 0;
3461         WREG32(CP_RB1_WPTR, ring->wptr);
3462
3463         /* set the wb address whether it's enabled or not */
3464         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3465         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3466
3467         mdelay(1);
3468         WREG32(CP_RB1_CNTL, tmp);
3469
3470         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3471
3472         /* ring2 - compute only */
3473         /* Set ring buffer size */
3474         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3475         rb_bufsz = order_base_2(ring->ring_size / 8);
3476         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3477 #ifdef __BIG_ENDIAN
3478         tmp |= BUF_SWAP_32BIT;
3479 #endif
3480         WREG32(CP_RB2_CNTL, tmp);
3481
3482         /* Initialize the ring buffer's read and write pointers */
3483         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3484         ring->wptr = 0;
3485         WREG32(CP_RB2_WPTR, ring->wptr);
3486
3487         /* set the wb address whether it's enabled or not */
3488         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3489         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3490
3491         mdelay(1);
3492         WREG32(CP_RB2_CNTL, tmp);
3493
3494         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3495
3496         /* start the rings */
3497         si_cp_start(rdev);
3498         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3499         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3500         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3501         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3502         if (r) {
3503                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3504                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3505                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3506                 return r;
3507         }
3508         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3509         if (r) {
3510                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3511         }
3512         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3513         if (r) {
3514                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3515         }
3516
3517         si_enable_gui_idle_interrupt(rdev, true);
3518
3519         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3520                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3521
3522         return 0;
3523 }
3524
3525 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3526 {
3527         u32 reset_mask = 0;
3528         u32 tmp;
3529
3530         /* GRBM_STATUS */
3531         tmp = RREG32(GRBM_STATUS);
3532         if (tmp & (PA_BUSY | SC_BUSY |
3533                    BCI_BUSY | SX_BUSY |
3534                    TA_BUSY | VGT_BUSY |
3535                    DB_BUSY | CB_BUSY |
3536                    GDS_BUSY | SPI_BUSY |
3537                    IA_BUSY | IA_BUSY_NO_DMA))
3538                 reset_mask |= RADEON_RESET_GFX;
3539
3540         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3541                    CP_BUSY | CP_COHERENCY_BUSY))
3542                 reset_mask |= RADEON_RESET_CP;
3543
3544         if (tmp & GRBM_EE_BUSY)
3545                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3546
3547         /* GRBM_STATUS2 */
3548         tmp = RREG32(GRBM_STATUS2);
3549         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3550                 reset_mask |= RADEON_RESET_RLC;
3551
3552         /* DMA_STATUS_REG 0 */
3553         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3554         if (!(tmp & DMA_IDLE))
3555                 reset_mask |= RADEON_RESET_DMA;
3556
3557         /* DMA_STATUS_REG 1 */
3558         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3559         if (!(tmp & DMA_IDLE))
3560                 reset_mask |= RADEON_RESET_DMA1;
3561
3562         /* SRBM_STATUS2 */
3563         tmp = RREG32(SRBM_STATUS2);
3564         if (tmp & DMA_BUSY)
3565                 reset_mask |= RADEON_RESET_DMA;
3566
3567         if (tmp & DMA1_BUSY)
3568                 reset_mask |= RADEON_RESET_DMA1;
3569
3570         /* SRBM_STATUS */
3571         tmp = RREG32(SRBM_STATUS);
3572
3573         if (tmp & IH_BUSY)
3574                 reset_mask |= RADEON_RESET_IH;
3575
3576         if (tmp & SEM_BUSY)
3577                 reset_mask |= RADEON_RESET_SEM;
3578
3579         if (tmp & GRBM_RQ_PENDING)
3580                 reset_mask |= RADEON_RESET_GRBM;
3581
3582         if (tmp & VMC_BUSY)
3583                 reset_mask |= RADEON_RESET_VMC;
3584
3585         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3586                    MCC_BUSY | MCD_BUSY))
3587                 reset_mask |= RADEON_RESET_MC;
3588
3589         if (evergreen_is_display_hung(rdev))
3590                 reset_mask |= RADEON_RESET_DISPLAY;
3591
3592         /* VM_L2_STATUS */
3593         tmp = RREG32(VM_L2_STATUS);
3594         if (tmp & L2_BUSY)
3595                 reset_mask |= RADEON_RESET_VMC;
3596
3597         /* Skip MC reset as it's mostly likely not hung, just busy */
3598         if (reset_mask & RADEON_RESET_MC) {
3599                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3600                 reset_mask &= ~RADEON_RESET_MC;
3601         }
3602
3603         return reset_mask;
3604 }
3605
3606 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3607 {
3608         struct evergreen_mc_save save;
3609         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3610         u32 tmp;
3611
3612         if (reset_mask == 0)
3613                 return;
3614
3615         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3616
3617         evergreen_print_gpu_status_regs(rdev);
3618         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3619                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3620         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3621                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3622
3623         /* disable PG/CG */
3624         si_fini_pg(rdev);
3625         si_fini_cg(rdev);
3626
3627         /* stop the rlc */
3628         si_rlc_stop(rdev);
3629
3630         /* Disable CP parsing/prefetching */
3631         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3632
3633         if (reset_mask & RADEON_RESET_DMA) {
3634                 /* dma0 */
3635                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3636                 tmp &= ~DMA_RB_ENABLE;
3637                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3638         }
3639         if (reset_mask & RADEON_RESET_DMA1) {
3640                 /* dma1 */
3641                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3642                 tmp &= ~DMA_RB_ENABLE;
3643                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3644         }
3645
3646         udelay(50);
3647
3648         evergreen_mc_stop(rdev, &save);
3649         if (evergreen_mc_wait_for_idle(rdev)) {
3650                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3651         }
3652
3653         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3654                 grbm_soft_reset = SOFT_RESET_CB |
3655                         SOFT_RESET_DB |
3656                         SOFT_RESET_GDS |
3657                         SOFT_RESET_PA |
3658                         SOFT_RESET_SC |
3659                         SOFT_RESET_BCI |
3660                         SOFT_RESET_SPI |
3661                         SOFT_RESET_SX |
3662                         SOFT_RESET_TC |
3663                         SOFT_RESET_TA |
3664                         SOFT_RESET_VGT |
3665                         SOFT_RESET_IA;
3666         }
3667
3668         if (reset_mask & RADEON_RESET_CP) {
3669                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3670
3671                 srbm_soft_reset |= SOFT_RESET_GRBM;
3672         }
3673
3674         if (reset_mask & RADEON_RESET_DMA)
3675                 srbm_soft_reset |= SOFT_RESET_DMA;
3676
3677         if (reset_mask & RADEON_RESET_DMA1)
3678                 srbm_soft_reset |= SOFT_RESET_DMA1;
3679
3680         if (reset_mask & RADEON_RESET_DISPLAY)
3681                 srbm_soft_reset |= SOFT_RESET_DC;
3682
3683         if (reset_mask & RADEON_RESET_RLC)
3684                 grbm_soft_reset |= SOFT_RESET_RLC;
3685
3686         if (reset_mask & RADEON_RESET_SEM)
3687                 srbm_soft_reset |= SOFT_RESET_SEM;
3688
3689         if (reset_mask & RADEON_RESET_IH)
3690                 srbm_soft_reset |= SOFT_RESET_IH;
3691
3692         if (reset_mask & RADEON_RESET_GRBM)
3693                 srbm_soft_reset |= SOFT_RESET_GRBM;
3694
3695         if (reset_mask & RADEON_RESET_VMC)
3696                 srbm_soft_reset |= SOFT_RESET_VMC;
3697
3698         if (reset_mask & RADEON_RESET_MC)
3699                 srbm_soft_reset |= SOFT_RESET_MC;
3700
3701         if (grbm_soft_reset) {
3702                 tmp = RREG32(GRBM_SOFT_RESET);
3703                 tmp |= grbm_soft_reset;
3704                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3705                 WREG32(GRBM_SOFT_RESET, tmp);
3706                 tmp = RREG32(GRBM_SOFT_RESET);
3707
3708                 udelay(50);
3709
3710                 tmp &= ~grbm_soft_reset;
3711                 WREG32(GRBM_SOFT_RESET, tmp);
3712                 tmp = RREG32(GRBM_SOFT_RESET);
3713         }
3714
3715         if (srbm_soft_reset) {
3716                 tmp = RREG32(SRBM_SOFT_RESET);
3717                 tmp |= srbm_soft_reset;
3718                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3719                 WREG32(SRBM_SOFT_RESET, tmp);
3720                 tmp = RREG32(SRBM_SOFT_RESET);
3721
3722                 udelay(50);
3723
3724                 tmp &= ~srbm_soft_reset;
3725                 WREG32(SRBM_SOFT_RESET, tmp);
3726                 tmp = RREG32(SRBM_SOFT_RESET);
3727         }
3728
3729         /* Wait a little for things to settle down */
3730         udelay(50);
3731
3732         evergreen_mc_resume(rdev, &save);
3733         udelay(50);
3734
3735         evergreen_print_gpu_status_regs(rdev);
3736 }
3737
3738 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3739 {
3740         u32 tmp, i;
3741
3742         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3743         tmp |= SPLL_BYPASS_EN;
3744         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3745
3746         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3747         tmp |= SPLL_CTLREQ_CHG;
3748         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3749
3750         for (i = 0; i < rdev->usec_timeout; i++) {
3751                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3752                         break;
3753                 udelay(1);
3754         }
3755
3756         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3757         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3758         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3759
3760         tmp = RREG32(MPLL_CNTL_MODE);
3761         tmp &= ~MPLL_MCLK_SEL;
3762         WREG32(MPLL_CNTL_MODE, tmp);
3763 }
3764
3765 static void si_spll_powerdown(struct radeon_device *rdev)
3766 {
3767         u32 tmp;
3768
3769         tmp = RREG32(SPLL_CNTL_MODE);
3770         tmp |= SPLL_SW_DIR_CONTROL;
3771         WREG32(SPLL_CNTL_MODE, tmp);
3772
3773         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3774         tmp |= SPLL_RESET;
3775         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3776
3777         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3778         tmp |= SPLL_SLEEP;
3779         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3780
3781         tmp = RREG32(SPLL_CNTL_MODE);
3782         tmp &= ~SPLL_SW_DIR_CONTROL;
3783         WREG32(SPLL_CNTL_MODE, tmp);
3784 }
3785
3786 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3787 {
3788         struct evergreen_mc_save save;
3789         u32 tmp, i;
3790
3791         dev_info(rdev->dev, "GPU pci config reset\n");
3792
3793         /* disable dpm? */
3794
3795         /* disable cg/pg */
3796         si_fini_pg(rdev);
3797         si_fini_cg(rdev);
3798
3799         /* Disable CP parsing/prefetching */
3800         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3801         /* dma0 */
3802         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3803         tmp &= ~DMA_RB_ENABLE;
3804         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3805         /* dma1 */
3806         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3807         tmp &= ~DMA_RB_ENABLE;
3808         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3809         /* XXX other engines? */
3810
3811         /* halt the rlc, disable cp internal ints */
3812         si_rlc_stop(rdev);
3813
3814         udelay(50);
3815
3816         /* disable mem access */
3817         evergreen_mc_stop(rdev, &save);
3818         if (evergreen_mc_wait_for_idle(rdev)) {
3819                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
3820         }
3821
3822         /* set mclk/sclk to bypass */
3823         si_set_clk_bypass_mode(rdev);
3824         /* powerdown spll */
3825         si_spll_powerdown(rdev);
3826         /* disable BM */
3827         pci_clear_master(rdev->pdev);
3828         /* reset */
3829         radeon_pci_config_reset(rdev);
3830         /* wait for asic to come out of reset */
3831         for (i = 0; i < rdev->usec_timeout; i++) {
3832                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
3833                         break;
3834                 udelay(1);
3835         }
3836 }
3837
3838 int si_asic_reset(struct radeon_device *rdev)
3839 {
3840         u32 reset_mask;
3841
3842         reset_mask = si_gpu_check_soft_reset(rdev);
3843
3844         if (reset_mask)
3845                 r600_set_bios_scratch_engine_hung(rdev, true);
3846
3847         /* try soft reset */
3848         si_gpu_soft_reset(rdev, reset_mask);
3849
3850         reset_mask = si_gpu_check_soft_reset(rdev);
3851
3852         /* try pci config reset */
3853         if (reset_mask && radeon_hard_reset)
3854                 si_gpu_pci_config_reset(rdev);
3855
3856         reset_mask = si_gpu_check_soft_reset(rdev);
3857
3858         if (!reset_mask)
3859                 r600_set_bios_scratch_engine_hung(rdev, false);
3860
3861         return 0;
3862 }
3863
3864 /**
3865  * si_gfx_is_lockup - Check if the GFX engine is locked up
3866  *
3867  * @rdev: radeon_device pointer
3868  * @ring: radeon_ring structure holding ring information
3869  *
3870  * Check if the GFX engine is locked up.
3871  * Returns true if the engine appears to be locked up, false if not.
3872  */
3873 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3874 {
3875         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3876
3877         if (!(reset_mask & (RADEON_RESET_GFX |
3878                             RADEON_RESET_COMPUTE |
3879                             RADEON_RESET_CP))) {
3880                 radeon_ring_lockup_update(rdev, ring);
3881                 return false;
3882         }
3883         return radeon_ring_test_lockup(rdev, ring);
3884 }
3885
3886 /* MC */
3887 static void si_mc_program(struct radeon_device *rdev)
3888 {
3889         struct evergreen_mc_save save;
3890         u32 tmp;
3891         int i, j;
3892
3893         /* Initialize HDP */
3894         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3895                 WREG32((0x2c14 + j), 0x00000000);
3896                 WREG32((0x2c18 + j), 0x00000000);
3897                 WREG32((0x2c1c + j), 0x00000000);
3898                 WREG32((0x2c20 + j), 0x00000000);
3899                 WREG32((0x2c24 + j), 0x00000000);
3900         }
3901         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3902
3903         evergreen_mc_stop(rdev, &save);
3904         if (radeon_mc_wait_for_idle(rdev)) {
3905                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3906         }
3907         if (!ASIC_IS_NODCE(rdev))
3908                 /* Lockout access through VGA aperture*/
3909                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3910         /* Update configuration */
3911         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3912                rdev->mc.vram_start >> 12);
3913         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3914                rdev->mc.vram_end >> 12);
3915         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3916                rdev->vram_scratch.gpu_addr >> 12);
3917         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3918         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3919         WREG32(MC_VM_FB_LOCATION, tmp);
3920         /* XXX double check these! */
3921         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3922         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3923         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3924         WREG32(MC_VM_AGP_BASE, 0);
3925         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3926         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3927         if (radeon_mc_wait_for_idle(rdev)) {
3928                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3929         }
3930         evergreen_mc_resume(rdev, &save);
3931         if (!ASIC_IS_NODCE(rdev)) {
3932                 /* we need to own VRAM, so turn off the VGA renderer here
3933                  * to stop it overwriting our objects */
3934                 rv515_vga_render_disable(rdev);
3935         }
3936 }
3937
3938 void si_vram_gtt_location(struct radeon_device *rdev,
3939                           struct radeon_mc *mc)
3940 {
3941         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3942                 /* leave room for at least 1024M GTT */
3943                 dev_warn(rdev->dev, "limiting VRAM\n");
3944                 mc->real_vram_size = 0xFFC0000000ULL;
3945                 mc->mc_vram_size = 0xFFC0000000ULL;
3946         }
3947         radeon_vram_location(rdev, &rdev->mc, 0);
3948         rdev->mc.gtt_base_align = 0;
3949         radeon_gtt_location(rdev, mc);
3950 }
3951
3952 static int si_mc_init(struct radeon_device *rdev)
3953 {
3954         u32 tmp;
3955         int chansize, numchan;
3956
3957         /* Get VRAM informations */
3958         rdev->mc.vram_is_ddr = true;
3959         tmp = RREG32(MC_ARB_RAMCFG);
3960         if (tmp & CHANSIZE_OVERRIDE) {
3961                 chansize = 16;
3962         } else if (tmp & CHANSIZE_MASK) {
3963                 chansize = 64;
3964         } else {
3965                 chansize = 32;
3966         }
3967         tmp = RREG32(MC_SHARED_CHMAP);
3968         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3969         case 0:
3970         default:
3971                 numchan = 1;
3972                 break;
3973         case 1:
3974                 numchan = 2;
3975                 break;
3976         case 2:
3977                 numchan = 4;
3978                 break;
3979         case 3:
3980                 numchan = 8;
3981                 break;
3982         case 4:
3983                 numchan = 3;
3984                 break;
3985         case 5:
3986                 numchan = 6;
3987                 break;
3988         case 6:
3989                 numchan = 10;
3990                 break;
3991         case 7:
3992                 numchan = 12;
3993                 break;
3994         case 8:
3995                 numchan = 16;
3996                 break;
3997         }
3998         rdev->mc.vram_width = numchan * chansize;
3999         /* Could aper size report 0 ? */
4000         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4001         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4002         /* size in MB on si */
4003         tmp = RREG32(CONFIG_MEMSIZE);
4004         /* some boards may have garbage in the upper 16 bits */
4005         if (tmp & 0xffff0000) {
4006                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4007                 if (tmp & 0xffff)
4008                         tmp &= 0xffff;
4009         }
4010         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4011         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4012         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4013         si_vram_gtt_location(rdev, &rdev->mc);
4014         radeon_update_bandwidth_info(rdev);
4015
4016         return 0;
4017 }
4018
4019 /*
4020  * GART
4021  */
4022 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4023 {
4024         /* flush hdp cache */
4025         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4026
4027         /* bits 0-15 are the VM contexts0-15 */
4028         WREG32(VM_INVALIDATE_REQUEST, 1);
4029 }
4030
4031 static int si_pcie_gart_enable(struct radeon_device *rdev)
4032 {
4033         int r, i;
4034
4035         if (rdev->gart.robj == NULL) {
4036                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4037                 return -EINVAL;
4038         }
4039         r = radeon_gart_table_vram_pin(rdev);
4040         if (r)
4041                 return r;
4042         radeon_gart_restore(rdev);
4043         /* Setup TLB control */
4044         WREG32(MC_VM_MX_L1_TLB_CNTL,
4045                (0xA << 7) |
4046                ENABLE_L1_TLB |
4047                ENABLE_L1_FRAGMENT_PROCESSING |
4048                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4049                ENABLE_ADVANCED_DRIVER_MODEL |
4050                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4051         /* Setup L2 cache */
4052         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4053                ENABLE_L2_FRAGMENT_PROCESSING |
4054                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4055                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4056                EFFECTIVE_L2_QUEUE_SIZE(7) |
4057                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4058         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4059         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4060                BANK_SELECT(4) |
4061                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4062         /* setup context0 */
4063         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4064         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4065         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4066         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4067                         (u32)(rdev->dummy_page.addr >> 12));
4068         WREG32(VM_CONTEXT0_CNTL2, 0);
4069         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4070                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4071
4072         WREG32(0x15D4, 0);
4073         WREG32(0x15D8, 0);
4074         WREG32(0x15DC, 0);
4075
4076         /* empty context1-15 */
4077         /* set vm size, must be a multiple of 4 */
4078         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4079         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4080         /* Assign the pt base to something valid for now; the pts used for
4081          * the VMs are determined by the application and setup and assigned
4082          * on the fly in the vm part of radeon_gart.c
4083          */
4084         for (i = 1; i < 16; i++) {
4085                 if (i < 8)
4086                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4087                                rdev->gart.table_addr >> 12);
4088                 else
4089                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4090                                rdev->gart.table_addr >> 12);
4091         }
4092
4093         /* enable context1-15 */
4094         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4095                (u32)(rdev->dummy_page.addr >> 12));
4096         WREG32(VM_CONTEXT1_CNTL2, 4);
4097         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4098                                 PAGE_TABLE_BLOCK_SIZE(RADEON_VM_BLOCK_SIZE - 9) |
4099                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4100                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4101                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4102                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4103                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4104                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4105                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4106                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4107                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4108                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4109                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4110                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4111
4112         si_pcie_gart_tlb_flush(rdev);
4113         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4114                  (unsigned)(rdev->mc.gtt_size >> 20),
4115                  (unsigned long long)rdev->gart.table_addr);
4116         rdev->gart.ready = true;
4117         return 0;
4118 }
4119
4120 static void si_pcie_gart_disable(struct radeon_device *rdev)
4121 {
4122         /* Disable all tables */
4123         WREG32(VM_CONTEXT0_CNTL, 0);
4124         WREG32(VM_CONTEXT1_CNTL, 0);
4125         /* Setup TLB control */
4126         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4127                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4128         /* Setup L2 cache */
4129         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4130                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4131                EFFECTIVE_L2_QUEUE_SIZE(7) |
4132                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4133         WREG32(VM_L2_CNTL2, 0);
4134         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4135                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4136         radeon_gart_table_vram_unpin(rdev);
4137 }
4138
4139 static void si_pcie_gart_fini(struct radeon_device *rdev)
4140 {
4141         si_pcie_gart_disable(rdev);
4142         radeon_gart_table_vram_free(rdev);
4143         radeon_gart_fini(rdev);
4144 }
4145
4146 /* vm parser */
4147 static bool si_vm_reg_valid(u32 reg)
4148 {
4149         /* context regs are fine */
4150         if (reg >= 0x28000)
4151                 return true;
4152
4153         /* check config regs */
4154         switch (reg) {
4155         case GRBM_GFX_INDEX:
4156         case CP_STRMOUT_CNTL:
4157         case VGT_VTX_VECT_EJECT_REG:
4158         case VGT_CACHE_INVALIDATION:
4159         case VGT_ESGS_RING_SIZE:
4160         case VGT_GSVS_RING_SIZE:
4161         case VGT_GS_VERTEX_REUSE:
4162         case VGT_PRIMITIVE_TYPE:
4163         case VGT_INDEX_TYPE:
4164         case VGT_NUM_INDICES:
4165         case VGT_NUM_INSTANCES:
4166         case VGT_TF_RING_SIZE:
4167         case VGT_HS_OFFCHIP_PARAM:
4168         case VGT_TF_MEMORY_BASE:
4169         case PA_CL_ENHANCE:
4170         case PA_SU_LINE_STIPPLE_VALUE:
4171         case PA_SC_LINE_STIPPLE_STATE:
4172         case PA_SC_ENHANCE:
4173         case SQC_CACHES:
4174         case SPI_STATIC_THREAD_MGMT_1:
4175         case SPI_STATIC_THREAD_MGMT_2:
4176         case SPI_STATIC_THREAD_MGMT_3:
4177         case SPI_PS_MAX_WAVE_ID:
4178         case SPI_CONFIG_CNTL:
4179         case SPI_CONFIG_CNTL_1:
4180         case TA_CNTL_AUX:
4181                 return true;
4182         default:
4183                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4184                 return false;
4185         }
4186 }
4187
4188 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4189                                   u32 *ib, struct radeon_cs_packet *pkt)
4190 {
4191         switch (pkt->opcode) {
4192         case PACKET3_NOP:
4193         case PACKET3_SET_BASE:
4194         case PACKET3_SET_CE_DE_COUNTERS:
4195         case PACKET3_LOAD_CONST_RAM:
4196         case PACKET3_WRITE_CONST_RAM:
4197         case PACKET3_WRITE_CONST_RAM_OFFSET:
4198         case PACKET3_DUMP_CONST_RAM:
4199         case PACKET3_INCREMENT_CE_COUNTER:
4200         case PACKET3_WAIT_ON_DE_COUNTER:
4201         case PACKET3_CE_WRITE:
4202                 break;
4203         default:
4204                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4205                 return -EINVAL;
4206         }
4207         return 0;
4208 }
4209
4210 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4211 {
4212         u32 start_reg, reg, i;
4213         u32 command = ib[idx + 4];
4214         u32 info = ib[idx + 1];
4215         u32 idx_value = ib[idx];
4216         if (command & PACKET3_CP_DMA_CMD_SAS) {
4217                 /* src address space is register */
4218                 if (((info & 0x60000000) >> 29) == 0) {
4219                         start_reg = idx_value << 2;
4220                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4221                                 reg = start_reg;
4222                                 if (!si_vm_reg_valid(reg)) {
4223                                         DRM_ERROR("CP DMA Bad SRC register\n");
4224                                         return -EINVAL;
4225                                 }
4226                         } else {
4227                                 for (i = 0; i < (command & 0x1fffff); i++) {
4228                                         reg = start_reg + (4 * i);
4229                                         if (!si_vm_reg_valid(reg)) {
4230                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4231                                                 return -EINVAL;
4232                                         }
4233                                 }
4234                         }
4235                 }
4236         }
4237         if (command & PACKET3_CP_DMA_CMD_DAS) {
4238                 /* dst address space is register */
4239                 if (((info & 0x00300000) >> 20) == 0) {
4240                         start_reg = ib[idx + 2];
4241                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4242                                 reg = start_reg;
4243                                 if (!si_vm_reg_valid(reg)) {
4244                                         DRM_ERROR("CP DMA Bad DST register\n");
4245                                         return -EINVAL;
4246                                 }
4247                         } else {
4248                                 for (i = 0; i < (command & 0x1fffff); i++) {
4249                                         reg = start_reg + (4 * i);
4250                                 if (!si_vm_reg_valid(reg)) {
4251                                                 DRM_ERROR("CP DMA Bad DST register\n");
4252                                                 return -EINVAL;
4253                                         }
4254                                 }
4255                         }
4256                 }
4257         }
4258         return 0;
4259 }
4260
4261 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4262                                    u32 *ib, struct radeon_cs_packet *pkt)
4263 {
4264         int r;
4265         u32 idx = pkt->idx + 1;
4266         u32 idx_value = ib[idx];
4267         u32 start_reg, end_reg, reg, i;
4268
4269         switch (pkt->opcode) {
4270         case PACKET3_NOP:
4271         case PACKET3_SET_BASE:
4272         case PACKET3_CLEAR_STATE:
4273         case PACKET3_INDEX_BUFFER_SIZE:
4274         case PACKET3_DISPATCH_DIRECT:
4275         case PACKET3_DISPATCH_INDIRECT:
4276         case PACKET3_ALLOC_GDS:
4277         case PACKET3_WRITE_GDS_RAM:
4278         case PACKET3_ATOMIC_GDS:
4279         case PACKET3_ATOMIC:
4280         case PACKET3_OCCLUSION_QUERY:
4281         case PACKET3_SET_PREDICATION:
4282         case PACKET3_COND_EXEC:
4283         case PACKET3_PRED_EXEC:
4284         case PACKET3_DRAW_INDIRECT:
4285         case PACKET3_DRAW_INDEX_INDIRECT:
4286         case PACKET3_INDEX_BASE:
4287         case PACKET3_DRAW_INDEX_2:
4288         case PACKET3_CONTEXT_CONTROL:
4289         case PACKET3_INDEX_TYPE:
4290         case PACKET3_DRAW_INDIRECT_MULTI:
4291         case PACKET3_DRAW_INDEX_AUTO:
4292         case PACKET3_DRAW_INDEX_IMMD:
4293         case PACKET3_NUM_INSTANCES:
4294         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4295         case PACKET3_STRMOUT_BUFFER_UPDATE:
4296         case PACKET3_DRAW_INDEX_OFFSET_2:
4297         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4298         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4299         case PACKET3_MPEG_INDEX:
4300         case PACKET3_WAIT_REG_MEM:
4301         case PACKET3_MEM_WRITE:
4302         case PACKET3_PFP_SYNC_ME:
4303         case PACKET3_SURFACE_SYNC:
4304         case PACKET3_EVENT_WRITE:
4305         case PACKET3_EVENT_WRITE_EOP:
4306         case PACKET3_EVENT_WRITE_EOS:
4307         case PACKET3_SET_CONTEXT_REG:
4308         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4309         case PACKET3_SET_SH_REG:
4310         case PACKET3_SET_SH_REG_OFFSET:
4311         case PACKET3_INCREMENT_DE_COUNTER:
4312         case PACKET3_WAIT_ON_CE_COUNTER:
4313         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4314         case PACKET3_ME_WRITE:
4315                 break;
4316         case PACKET3_COPY_DATA:
4317                 if ((idx_value & 0xf00) == 0) {
4318                         reg = ib[idx + 3] * 4;
4319                         if (!si_vm_reg_valid(reg))
4320                                 return -EINVAL;
4321                 }
4322                 break;
4323         case PACKET3_WRITE_DATA:
4324                 if ((idx_value & 0xf00) == 0) {
4325                         start_reg = ib[idx + 1] * 4;
4326                         if (idx_value & 0x10000) {
4327                                 if (!si_vm_reg_valid(start_reg))
4328                                         return -EINVAL;
4329                         } else {
4330                                 for (i = 0; i < (pkt->count - 2); i++) {
4331                                         reg = start_reg + (4 * i);
4332                                         if (!si_vm_reg_valid(reg))
4333                                                 return -EINVAL;
4334                                 }
4335                         }
4336                 }
4337                 break;
4338         case PACKET3_COND_WRITE:
4339                 if (idx_value & 0x100) {
4340                         reg = ib[idx + 5] * 4;
4341                         if (!si_vm_reg_valid(reg))
4342                                 return -EINVAL;
4343                 }
4344                 break;
4345         case PACKET3_COPY_DW:
4346                 if (idx_value & 0x2) {
4347                         reg = ib[idx + 3] * 4;
4348                         if (!si_vm_reg_valid(reg))
4349                                 return -EINVAL;
4350                 }
4351                 break;
4352         case PACKET3_SET_CONFIG_REG:
4353                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4354                 end_reg = 4 * pkt->count + start_reg - 4;
4355                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4356                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4357                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4358                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4359                         return -EINVAL;
4360                 }
4361                 for (i = 0; i < pkt->count; i++) {
4362                         reg = start_reg + (4 * i);
4363                         if (!si_vm_reg_valid(reg))
4364                                 return -EINVAL;
4365                 }
4366                 break;
4367         case PACKET3_CP_DMA:
4368                 r = si_vm_packet3_cp_dma_check(ib, idx);
4369                 if (r)
4370                         return r;
4371                 break;
4372         default:
4373                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4374                 return -EINVAL;
4375         }
4376         return 0;
4377 }
4378
4379 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4380                                        u32 *ib, struct radeon_cs_packet *pkt)
4381 {
4382         int r;
4383         u32 idx = pkt->idx + 1;
4384         u32 idx_value = ib[idx];
4385         u32 start_reg, reg, i;
4386
4387         switch (pkt->opcode) {
4388         case PACKET3_NOP:
4389         case PACKET3_SET_BASE:
4390         case PACKET3_CLEAR_STATE:
4391         case PACKET3_DISPATCH_DIRECT:
4392         case PACKET3_DISPATCH_INDIRECT:
4393         case PACKET3_ALLOC_GDS:
4394         case PACKET3_WRITE_GDS_RAM:
4395         case PACKET3_ATOMIC_GDS:
4396         case PACKET3_ATOMIC:
4397         case PACKET3_OCCLUSION_QUERY:
4398         case PACKET3_SET_PREDICATION:
4399         case PACKET3_COND_EXEC:
4400         case PACKET3_PRED_EXEC:
4401         case PACKET3_CONTEXT_CONTROL:
4402         case PACKET3_STRMOUT_BUFFER_UPDATE:
4403         case PACKET3_WAIT_REG_MEM:
4404         case PACKET3_MEM_WRITE:
4405         case PACKET3_PFP_SYNC_ME:
4406         case PACKET3_SURFACE_SYNC:
4407         case PACKET3_EVENT_WRITE:
4408         case PACKET3_EVENT_WRITE_EOP:
4409         case PACKET3_EVENT_WRITE_EOS:
4410         case PACKET3_SET_CONTEXT_REG:
4411         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4412         case PACKET3_SET_SH_REG:
4413         case PACKET3_SET_SH_REG_OFFSET:
4414         case PACKET3_INCREMENT_DE_COUNTER:
4415         case PACKET3_WAIT_ON_CE_COUNTER:
4416         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4417         case PACKET3_ME_WRITE:
4418                 break;
4419         case PACKET3_COPY_DATA:
4420                 if ((idx_value & 0xf00) == 0) {
4421                         reg = ib[idx + 3] * 4;
4422                         if (!si_vm_reg_valid(reg))
4423                                 return -EINVAL;
4424                 }
4425                 break;
4426         case PACKET3_WRITE_DATA:
4427                 if ((idx_value & 0xf00) == 0) {
4428                         start_reg = ib[idx + 1] * 4;
4429                         if (idx_value & 0x10000) {
4430                                 if (!si_vm_reg_valid(start_reg))
4431                                         return -EINVAL;
4432                         } else {
4433                                 for (i = 0; i < (pkt->count - 2); i++) {
4434                                         reg = start_reg + (4 * i);
4435                                         if (!si_vm_reg_valid(reg))
4436                                                 return -EINVAL;
4437                                 }
4438                         }
4439                 }
4440                 break;
4441         case PACKET3_COND_WRITE:
4442                 if (idx_value & 0x100) {
4443                         reg = ib[idx + 5] * 4;
4444                         if (!si_vm_reg_valid(reg))
4445                                 return -EINVAL;
4446                 }
4447                 break;
4448         case PACKET3_COPY_DW:
4449                 if (idx_value & 0x2) {
4450                         reg = ib[idx + 3] * 4;
4451                         if (!si_vm_reg_valid(reg))
4452                                 return -EINVAL;
4453                 }
4454                 break;
4455         case PACKET3_CP_DMA:
4456                 r = si_vm_packet3_cp_dma_check(ib, idx);
4457                 if (r)
4458                         return r;
4459                 break;
4460         default:
4461                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4462                 return -EINVAL;
4463         }
4464         return 0;
4465 }
4466
4467 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4468 {
4469         int ret = 0;
4470         u32 idx = 0;
4471         struct radeon_cs_packet pkt;
4472
4473         do {
4474                 pkt.idx = idx;
4475                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4476                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4477                 pkt.one_reg_wr = 0;
4478                 switch (pkt.type) {
4479                 case RADEON_PACKET_TYPE0:
4480                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4481                         ret = -EINVAL;
4482                         break;
4483                 case RADEON_PACKET_TYPE2:
4484                         idx += 1;
4485                         break;
4486                 case RADEON_PACKET_TYPE3:
4487                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4488                         if (ib->is_const_ib)
4489                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4490                         else {
4491                                 switch (ib->ring) {
4492                                 case RADEON_RING_TYPE_GFX_INDEX:
4493                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4494                                         break;
4495                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4496                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4497                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4498                                         break;
4499                                 default:
4500                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4501                                         ret = -EINVAL;
4502                                         break;
4503                                 }
4504                         }
4505                         idx += pkt.count + 2;
4506                         break;
4507                 default:
4508                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4509                         ret = -EINVAL;
4510                         break;
4511                 }
4512                 if (ret)
4513                         break;
4514         } while (idx < ib->length_dw);
4515
4516         return ret;
4517 }
4518
4519 /*
4520  * vm
4521  */
4522 int si_vm_init(struct radeon_device *rdev)
4523 {
4524         /* number of VMs */
4525         rdev->vm_manager.nvm = 16;
4526         /* base offset of vram pages */
4527         rdev->vm_manager.vram_base_offset = 0;
4528
4529         return 0;
4530 }
4531
4532 void si_vm_fini(struct radeon_device *rdev)
4533 {
4534 }
4535
4536 /**
4537  * si_vm_decode_fault - print human readable fault info
4538  *
4539  * @rdev: radeon_device pointer
4540  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4541  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4542  *
4543  * Print human readable fault information (SI).
4544  */
4545 static void si_vm_decode_fault(struct radeon_device *rdev,
4546                                u32 status, u32 addr)
4547 {
4548         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4549         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4550         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4551         char *block;
4552
4553         if (rdev->family == CHIP_TAHITI) {
4554                 switch (mc_id) {
4555                 case 160:
4556                 case 144:
4557                 case 96:
4558                 case 80:
4559                 case 224:
4560                 case 208:
4561                 case 32:
4562                 case 16:
4563                         block = "CB";
4564                         break;
4565                 case 161:
4566                 case 145:
4567                 case 97:
4568                 case 81:
4569                 case 225:
4570                 case 209:
4571                 case 33:
4572                 case 17:
4573                         block = "CB_FMASK";
4574                         break;
4575                 case 162:
4576                 case 146:
4577                 case 98:
4578                 case 82:
4579                 case 226:
4580                 case 210:
4581                 case 34:
4582                 case 18:
4583                         block = "CB_CMASK";
4584                         break;
4585                 case 163:
4586                 case 147:
4587                 case 99:
4588                 case 83:
4589                 case 227:
4590                 case 211:
4591                 case 35:
4592                 case 19:
4593                         block = "CB_IMMED";
4594                         break;
4595                 case 164:
4596                 case 148:
4597                 case 100:
4598                 case 84:
4599                 case 228:
4600                 case 212:
4601                 case 36:
4602                 case 20:
4603                         block = "DB";
4604                         break;
4605                 case 165:
4606                 case 149:
4607                 case 101:
4608                 case 85:
4609                 case 229:
4610                 case 213:
4611                 case 37:
4612                 case 21:
4613                         block = "DB_HTILE";
4614                         break;
4615                 case 167:
4616                 case 151:
4617                 case 103:
4618                 case 87:
4619                 case 231:
4620                 case 215:
4621                 case 39:
4622                 case 23:
4623                         block = "DB_STEN";
4624                         break;
4625                 case 72:
4626                 case 68:
4627                 case 64:
4628                 case 8:
4629                 case 4:
4630                 case 0:
4631                 case 136:
4632                 case 132:
4633                 case 128:
4634                 case 200:
4635                 case 196:
4636                 case 192:
4637                         block = "TC";
4638                         break;
4639                 case 112:
4640                 case 48:
4641                         block = "CP";
4642                         break;
4643                 case 49:
4644                 case 177:
4645                 case 50:
4646                 case 178:
4647                         block = "SH";
4648                         break;
4649                 case 53:
4650                 case 190:
4651                         block = "VGT";
4652                         break;
4653                 case 117:
4654                         block = "IH";
4655                         break;
4656                 case 51:
4657                 case 115:
4658                         block = "RLC";
4659                         break;
4660                 case 119:
4661                 case 183:
4662                         block = "DMA0";
4663                         break;
4664                 case 61:
4665                         block = "DMA1";
4666                         break;
4667                 case 248:
4668                 case 120:
4669                         block = "HDP";
4670                         break;
4671                 default:
4672                         block = "unknown";
4673                         break;
4674                 }
4675         } else {
4676                 switch (mc_id) {
4677                 case 32:
4678                 case 16:
4679                 case 96:
4680                 case 80:
4681                 case 160:
4682                 case 144:
4683                 case 224:
4684                 case 208:
4685                         block = "CB";
4686                         break;
4687                 case 33:
4688                 case 17:
4689                 case 97:
4690                 case 81:
4691                 case 161:
4692                 case 145:
4693                 case 225:
4694                 case 209:
4695                         block = "CB_FMASK";
4696                         break;
4697                 case 34:
4698                 case 18:
4699                 case 98:
4700                 case 82:
4701                 case 162:
4702                 case 146:
4703                 case 226:
4704                 case 210:
4705                         block = "CB_CMASK";
4706                         break;
4707                 case 35:
4708                 case 19:
4709                 case 99:
4710                 case 83:
4711                 case 163:
4712                 case 147:
4713                 case 227:
4714                 case 211:
4715                         block = "CB_IMMED";
4716                         break;
4717                 case 36:
4718                 case 20:
4719                 case 100:
4720                 case 84:
4721                 case 164:
4722                 case 148:
4723                 case 228:
4724                 case 212:
4725                         block = "DB";
4726                         break;
4727                 case 37:
4728                 case 21:
4729                 case 101:
4730                 case 85:
4731                 case 165:
4732                 case 149:
4733                 case 229:
4734                 case 213:
4735                         block = "DB_HTILE";
4736                         break;
4737                 case 39:
4738                 case 23:
4739                 case 103:
4740                 case 87:
4741                 case 167:
4742                 case 151:
4743                 case 231:
4744                 case 215:
4745                         block = "DB_STEN";
4746                         break;
4747                 case 72:
4748                 case 68:
4749                 case 8:
4750                 case 4:
4751                 case 136:
4752                 case 132:
4753                 case 200:
4754                 case 196:
4755                         block = "TC";
4756                         break;
4757                 case 112:
4758                 case 48:
4759                         block = "CP";
4760                         break;
4761                 case 49:
4762                 case 177:
4763                 case 50:
4764                 case 178:
4765                         block = "SH";
4766                         break;
4767                 case 53:
4768                         block = "VGT";
4769                         break;
4770                 case 117:
4771                         block = "IH";
4772                         break;
4773                 case 51:
4774                 case 115:
4775                         block = "RLC";
4776                         break;
4777                 case 119:
4778                 case 183:
4779                         block = "DMA0";
4780                         break;
4781                 case 61:
4782                         block = "DMA1";
4783                         break;
4784                 case 248:
4785                 case 120:
4786                         block = "HDP";
4787                         break;
4788                 default:
4789                         block = "unknown";
4790                         break;
4791                 }
4792         }
4793
4794         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4795                protections, vmid, addr,
4796                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4797                block, mc_id);
4798 }
4799
4800 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4801 {
4802         struct radeon_ring *ring = &rdev->ring[ridx];
4803
4804         if (vm == NULL)
4805                 return;
4806
4807         /* write new base address */
4808         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4809         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4810                                  WRITE_DATA_DST_SEL(0)));
4811
4812         if (vm->id < 8) {
4813                 radeon_ring_write(ring,
4814                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4815         } else {
4816                 radeon_ring_write(ring,
4817                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4818         }
4819         radeon_ring_write(ring, 0);
4820         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4821
4822         /* flush hdp cache */
4823         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4824         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4825                                  WRITE_DATA_DST_SEL(0)));
4826         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4827         radeon_ring_write(ring, 0);
4828         radeon_ring_write(ring, 0x1);
4829
4830         /* bits 0-15 are the VM contexts0-15 */
4831         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4832         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4833                                  WRITE_DATA_DST_SEL(0)));
4834         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4835         radeon_ring_write(ring, 0);
4836         radeon_ring_write(ring, 1 << vm->id);
4837
4838         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4839         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4840         radeon_ring_write(ring, 0x0);
4841 }
4842
4843 /*
4844  *  Power and clock gating
4845  */
4846 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4847 {
4848         int i;
4849
4850         for (i = 0; i < rdev->usec_timeout; i++) {
4851                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4852                         break;
4853                 udelay(1);
4854         }
4855
4856         for (i = 0; i < rdev->usec_timeout; i++) {
4857                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4858                         break;
4859                 udelay(1);
4860         }
4861 }
4862
4863 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4864                                          bool enable)
4865 {
4866         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4867         u32 mask;
4868         int i;
4869
4870         if (enable)
4871                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4872         else
4873                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4874         WREG32(CP_INT_CNTL_RING0, tmp);
4875
4876         if (!enable) {
4877                 /* read a gfx register */
4878                 tmp = RREG32(DB_DEPTH_INFO);
4879
4880                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4881                 for (i = 0; i < rdev->usec_timeout; i++) {
4882                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4883                                 break;
4884                         udelay(1);
4885                 }
4886         }
4887 }
4888
4889 static void si_set_uvd_dcm(struct radeon_device *rdev,
4890                            bool sw_mode)
4891 {
4892         u32 tmp, tmp2;
4893
4894         tmp = RREG32(UVD_CGC_CTRL);
4895         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4896         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4897
4898         if (sw_mode) {
4899                 tmp &= ~0x7ffff800;
4900                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4901         } else {
4902                 tmp |= 0x7ffff800;
4903                 tmp2 = 0;
4904         }
4905
4906         WREG32(UVD_CGC_CTRL, tmp);
4907         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4908 }
4909
4910 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4911 {
4912         bool hw_mode = true;
4913
4914         if (hw_mode) {
4915                 si_set_uvd_dcm(rdev, false);
4916         } else {
4917                 u32 tmp = RREG32(UVD_CGC_CTRL);
4918                 tmp &= ~DCM;
4919                 WREG32(UVD_CGC_CTRL, tmp);
4920         }
4921 }
4922
4923 static u32 si_halt_rlc(struct radeon_device *rdev)
4924 {
4925         u32 data, orig;
4926
4927         orig = data = RREG32(RLC_CNTL);
4928
4929         if (data & RLC_ENABLE) {
4930                 data &= ~RLC_ENABLE;
4931                 WREG32(RLC_CNTL, data);
4932
4933                 si_wait_for_rlc_serdes(rdev);
4934         }
4935
4936         return orig;
4937 }
4938
4939 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4940 {
4941         u32 tmp;
4942
4943         tmp = RREG32(RLC_CNTL);
4944         if (tmp != rlc)
4945                 WREG32(RLC_CNTL, rlc);
4946 }
4947
4948 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4949 {
4950         u32 data, orig;
4951
4952         orig = data = RREG32(DMA_PG);
4953         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4954                 data |= PG_CNTL_ENABLE;
4955         else
4956                 data &= ~PG_CNTL_ENABLE;
4957         if (orig != data)
4958                 WREG32(DMA_PG, data);
4959 }
4960
4961 static void si_init_dma_pg(struct radeon_device *rdev)
4962 {
4963         u32 tmp;
4964
4965         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4966         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4967
4968         for (tmp = 0; tmp < 5; tmp++)
4969                 WREG32(DMA_PGFSM_WRITE, 0);
4970 }
4971
4972 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4973                                bool enable)
4974 {
4975         u32 tmp;
4976
4977         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4978                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4979                 WREG32(RLC_TTOP_D, tmp);
4980
4981                 tmp = RREG32(RLC_PG_CNTL);
4982                 tmp |= GFX_PG_ENABLE;
4983                 WREG32(RLC_PG_CNTL, tmp);
4984
4985                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4986                 tmp |= AUTO_PG_EN;
4987                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4988         } else {
4989                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4990                 tmp &= ~AUTO_PG_EN;
4991                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4992
4993                 tmp = RREG32(DB_RENDER_CONTROL);
4994         }
4995 }
4996
4997 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4998 {
4999         u32 tmp;
5000
5001         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5002
5003         tmp = RREG32(RLC_PG_CNTL);
5004         tmp |= GFX_PG_SRC;
5005         WREG32(RLC_PG_CNTL, tmp);
5006
5007         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5008
5009         tmp = RREG32(RLC_AUTO_PG_CTRL);
5010
5011         tmp &= ~GRBM_REG_SGIT_MASK;
5012         tmp |= GRBM_REG_SGIT(0x700);
5013         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5014         WREG32(RLC_AUTO_PG_CTRL, tmp);
5015 }
5016
5017 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5018 {
5019         u32 mask = 0, tmp, tmp1;
5020         int i;
5021
5022         si_select_se_sh(rdev, se, sh);
5023         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5024         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5025         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5026
5027         tmp &= 0xffff0000;
5028
5029         tmp |= tmp1;
5030         tmp >>= 16;
5031
5032         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5033                 mask <<= 1;
5034                 mask |= 1;
5035         }
5036
5037         return (~tmp) & mask;
5038 }
5039
5040 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5041 {
5042         u32 i, j, k, active_cu_number = 0;
5043         u32 mask, counter, cu_bitmap;
5044         u32 tmp = 0;
5045
5046         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5047                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5048                         mask = 1;
5049                         cu_bitmap = 0;
5050                         counter  = 0;
5051                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5052                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5053                                         if (counter < 2)
5054                                                 cu_bitmap |= mask;
5055                                         counter++;
5056                                 }
5057                                 mask <<= 1;
5058                         }
5059
5060                         active_cu_number += counter;
5061                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5062                 }
5063         }
5064
5065         WREG32(RLC_PG_AO_CU_MASK, tmp);
5066
5067         tmp = RREG32(RLC_MAX_PG_CU);
5068         tmp &= ~MAX_PU_CU_MASK;
5069         tmp |= MAX_PU_CU(active_cu_number);
5070         WREG32(RLC_MAX_PG_CU, tmp);
5071 }
5072
5073 static void si_enable_cgcg(struct radeon_device *rdev,
5074                            bool enable)
5075 {
5076         u32 data, orig, tmp;
5077
5078         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5079
5080         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5081                 si_enable_gui_idle_interrupt(rdev, true);
5082
5083                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5084
5085                 tmp = si_halt_rlc(rdev);
5086
5087                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5088                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5089                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5090
5091                 si_wait_for_rlc_serdes(rdev);
5092
5093                 si_update_rlc(rdev, tmp);
5094
5095                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5096
5097                 data |= CGCG_EN | CGLS_EN;
5098         } else {
5099                 si_enable_gui_idle_interrupt(rdev, false);
5100
5101                 RREG32(CB_CGTT_SCLK_CTRL);
5102                 RREG32(CB_CGTT_SCLK_CTRL);
5103                 RREG32(CB_CGTT_SCLK_CTRL);
5104                 RREG32(CB_CGTT_SCLK_CTRL);
5105
5106                 data &= ~(CGCG_EN | CGLS_EN);
5107         }
5108
5109         if (orig != data)
5110                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5111 }
5112
5113 static void si_enable_mgcg(struct radeon_device *rdev,
5114                            bool enable)
5115 {
5116         u32 data, orig, tmp = 0;
5117
5118         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5119                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5120                 data = 0x96940200;
5121                 if (orig != data)
5122                         WREG32(CGTS_SM_CTRL_REG, data);
5123
5124                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5125                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5126                         data |= CP_MEM_LS_EN;
5127                         if (orig != data)
5128                                 WREG32(CP_MEM_SLP_CNTL, data);
5129                 }
5130
5131                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5132                 data &= 0xffffffc0;
5133                 if (orig != data)
5134                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5135
5136                 tmp = si_halt_rlc(rdev);
5137
5138                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5139                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5140                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5141
5142                 si_update_rlc(rdev, tmp);
5143         } else {
5144                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5145                 data |= 0x00000003;
5146                 if (orig != data)
5147                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5148
5149                 data = RREG32(CP_MEM_SLP_CNTL);
5150                 if (data & CP_MEM_LS_EN) {
5151                         data &= ~CP_MEM_LS_EN;
5152                         WREG32(CP_MEM_SLP_CNTL, data);
5153                 }
5154                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5155                 data |= LS_OVERRIDE | OVERRIDE;
5156                 if (orig != data)
5157                         WREG32(CGTS_SM_CTRL_REG, data);
5158
5159                 tmp = si_halt_rlc(rdev);
5160
5161                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5162                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5163                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5164
5165                 si_update_rlc(rdev, tmp);
5166         }
5167 }
5168
5169 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5170                                bool enable)
5171 {
5172         u32 orig, data, tmp;
5173
5174         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5175                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5176                 tmp |= 0x3fff;
5177                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5178
5179                 orig = data = RREG32(UVD_CGC_CTRL);
5180                 data |= DCM;
5181                 if (orig != data)
5182                         WREG32(UVD_CGC_CTRL, data);
5183
5184                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5185                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5186         } else {
5187                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5188                 tmp &= ~0x3fff;
5189                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5190
5191                 orig = data = RREG32(UVD_CGC_CTRL);
5192                 data &= ~DCM;
5193                 if (orig != data)
5194                         WREG32(UVD_CGC_CTRL, data);
5195
5196                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5197                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5198         }
5199 }
5200
5201 static const u32 mc_cg_registers[] =
5202 {
5203         MC_HUB_MISC_HUB_CG,
5204         MC_HUB_MISC_SIP_CG,
5205         MC_HUB_MISC_VM_CG,
5206         MC_XPB_CLK_GAT,
5207         ATC_MISC_CG,
5208         MC_CITF_MISC_WR_CG,
5209         MC_CITF_MISC_RD_CG,
5210         MC_CITF_MISC_VM_CG,
5211         VM_L2_CG,
5212 };
5213
5214 static void si_enable_mc_ls(struct radeon_device *rdev,
5215                             bool enable)
5216 {
5217         int i;
5218         u32 orig, data;
5219
5220         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5221                 orig = data = RREG32(mc_cg_registers[i]);
5222                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5223                         data |= MC_LS_ENABLE;
5224                 else
5225                         data &= ~MC_LS_ENABLE;
5226                 if (data != orig)
5227                         WREG32(mc_cg_registers[i], data);
5228         }
5229 }
5230
5231 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5232                                bool enable)
5233 {
5234         int i;
5235         u32 orig, data;
5236
5237         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5238                 orig = data = RREG32(mc_cg_registers[i]);
5239                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5240                         data |= MC_CG_ENABLE;
5241                 else
5242                         data &= ~MC_CG_ENABLE;
5243                 if (data != orig)
5244                         WREG32(mc_cg_registers[i], data);
5245         }
5246 }
5247
5248 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5249                                bool enable)
5250 {
5251         u32 orig, data, offset;
5252         int i;
5253
5254         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5255                 for (i = 0; i < 2; i++) {
5256                         if (i == 0)
5257                                 offset = DMA0_REGISTER_OFFSET;
5258                         else
5259                                 offset = DMA1_REGISTER_OFFSET;
5260                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5261                         data &= ~MEM_POWER_OVERRIDE;
5262                         if (data != orig)
5263                                 WREG32(DMA_POWER_CNTL + offset, data);
5264                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5265                 }
5266         } else {
5267                 for (i = 0; i < 2; i++) {
5268                         if (i == 0)
5269                                 offset = DMA0_REGISTER_OFFSET;
5270                         else
5271                                 offset = DMA1_REGISTER_OFFSET;
5272                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5273                         data |= MEM_POWER_OVERRIDE;
5274                         if (data != orig)
5275                                 WREG32(DMA_POWER_CNTL + offset, data);
5276
5277                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5278                         data = 0xff000000;
5279                         if (data != orig)
5280                                 WREG32(DMA_CLK_CTRL + offset, data);
5281                 }
5282         }
5283 }
5284
5285 static void si_enable_bif_mgls(struct radeon_device *rdev,
5286                                bool enable)
5287 {
5288         u32 orig, data;
5289
5290         orig = data = RREG32_PCIE(PCIE_CNTL2);
5291
5292         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5293                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5294                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5295         else
5296                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5297                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5298
5299         if (orig != data)
5300                 WREG32_PCIE(PCIE_CNTL2, data);
5301 }
5302
5303 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5304                                bool enable)
5305 {
5306         u32 orig, data;
5307
5308         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5309
5310         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5311                 data &= ~CLOCK_GATING_DIS;
5312         else
5313                 data |= CLOCK_GATING_DIS;
5314
5315         if (orig != data)
5316                 WREG32(HDP_HOST_PATH_CNTL, data);
5317 }
5318
5319 static void si_enable_hdp_ls(struct radeon_device *rdev,
5320                              bool enable)
5321 {
5322         u32 orig, data;
5323
5324         orig = data = RREG32(HDP_MEM_POWER_LS);
5325
5326         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5327                 data |= HDP_LS_ENABLE;
5328         else
5329                 data &= ~HDP_LS_ENABLE;
5330
5331         if (orig != data)
5332                 WREG32(HDP_MEM_POWER_LS, data);
5333 }
5334
5335 static void si_update_cg(struct radeon_device *rdev,
5336                          u32 block, bool enable)
5337 {
5338         if (block & RADEON_CG_BLOCK_GFX) {
5339                 si_enable_gui_idle_interrupt(rdev, false);
5340                 /* order matters! */
5341                 if (enable) {
5342                         si_enable_mgcg(rdev, true);
5343                         si_enable_cgcg(rdev, true);
5344                 } else {
5345                         si_enable_cgcg(rdev, false);
5346                         si_enable_mgcg(rdev, false);
5347                 }
5348                 si_enable_gui_idle_interrupt(rdev, true);
5349         }
5350
5351         if (block & RADEON_CG_BLOCK_MC) {
5352                 si_enable_mc_mgcg(rdev, enable);
5353                 si_enable_mc_ls(rdev, enable);
5354         }
5355
5356         if (block & RADEON_CG_BLOCK_SDMA) {
5357                 si_enable_dma_mgcg(rdev, enable);
5358         }
5359
5360         if (block & RADEON_CG_BLOCK_BIF) {
5361                 si_enable_bif_mgls(rdev, enable);
5362         }
5363
5364         if (block & RADEON_CG_BLOCK_UVD) {
5365                 if (rdev->has_uvd) {
5366                         si_enable_uvd_mgcg(rdev, enable);
5367                 }
5368         }
5369
5370         if (block & RADEON_CG_BLOCK_HDP) {
5371                 si_enable_hdp_mgcg(rdev, enable);
5372                 si_enable_hdp_ls(rdev, enable);
5373         }
5374 }
5375
5376 static void si_init_cg(struct radeon_device *rdev)
5377 {
5378         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5379                             RADEON_CG_BLOCK_MC |
5380                             RADEON_CG_BLOCK_SDMA |
5381                             RADEON_CG_BLOCK_BIF |
5382                             RADEON_CG_BLOCK_HDP), true);
5383         if (rdev->has_uvd) {
5384                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5385                 si_init_uvd_internal_cg(rdev);
5386         }
5387 }
5388
5389 static void si_fini_cg(struct radeon_device *rdev)
5390 {
5391         if (rdev->has_uvd) {
5392                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5393         }
5394         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5395                             RADEON_CG_BLOCK_MC |
5396                             RADEON_CG_BLOCK_SDMA |
5397                             RADEON_CG_BLOCK_BIF |
5398                             RADEON_CG_BLOCK_HDP), false);
5399 }
5400
5401 u32 si_get_csb_size(struct radeon_device *rdev)
5402 {
5403         u32 count = 0;
5404         const struct cs_section_def *sect = NULL;
5405         const struct cs_extent_def *ext = NULL;
5406
5407         if (rdev->rlc.cs_data == NULL)
5408                 return 0;
5409
5410         /* begin clear state */
5411         count += 2;
5412         /* context control state */
5413         count += 3;
5414
5415         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5416                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5417                         if (sect->id == SECT_CONTEXT)
5418                                 count += 2 + ext->reg_count;
5419                         else
5420                                 return 0;
5421                 }
5422         }
5423         /* pa_sc_raster_config */
5424         count += 3;
5425         /* end clear state */
5426         count += 2;
5427         /* clear state */
5428         count += 2;
5429
5430         return count;
5431 }
5432
5433 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5434 {
5435         u32 count = 0, i;
5436         const struct cs_section_def *sect = NULL;
5437         const struct cs_extent_def *ext = NULL;
5438
5439         if (rdev->rlc.cs_data == NULL)
5440                 return;
5441         if (buffer == NULL)
5442                 return;
5443
5444         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5445         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5446
5447         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5448         buffer[count++] = cpu_to_le32(0x80000000);
5449         buffer[count++] = cpu_to_le32(0x80000000);
5450
5451         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5452                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5453                         if (sect->id == SECT_CONTEXT) {
5454                                 buffer[count++] =
5455                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5456                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5457                                 for (i = 0; i < ext->reg_count; i++)
5458                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5459                         } else {
5460                                 return;
5461                         }
5462                 }
5463         }
5464
5465         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5466         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5467         switch (rdev->family) {
5468         case CHIP_TAHITI:
5469         case CHIP_PITCAIRN:
5470                 buffer[count++] = cpu_to_le32(0x2a00126a);
5471                 break;
5472         case CHIP_VERDE:
5473                 buffer[count++] = cpu_to_le32(0x0000124a);
5474                 break;
5475         case CHIP_OLAND:
5476                 buffer[count++] = cpu_to_le32(0x00000082);
5477                 break;
5478         case CHIP_HAINAN:
5479                 buffer[count++] = cpu_to_le32(0x00000000);
5480                 break;
5481         default:
5482                 buffer[count++] = cpu_to_le32(0x00000000);
5483                 break;
5484         }
5485
5486         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5487         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5488
5489         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5490         buffer[count++] = cpu_to_le32(0);
5491 }
5492
5493 static void si_init_pg(struct radeon_device *rdev)
5494 {
5495         if (rdev->pg_flags) {
5496                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5497                         si_init_dma_pg(rdev);
5498                 }
5499                 si_init_ao_cu_mask(rdev);
5500                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5501                         si_init_gfx_cgpg(rdev);
5502                 } else {
5503                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5504                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5505                 }
5506                 si_enable_dma_pg(rdev, true);
5507                 si_enable_gfx_cgpg(rdev, true);
5508         } else {
5509                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5510                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5511         }
5512 }
5513
5514 static void si_fini_pg(struct radeon_device *rdev)
5515 {
5516         if (rdev->pg_flags) {
5517                 si_enable_dma_pg(rdev, false);
5518                 si_enable_gfx_cgpg(rdev, false);
5519         }
5520 }
5521
5522 /*
5523  * RLC
5524  */
5525 void si_rlc_reset(struct radeon_device *rdev)
5526 {
5527         u32 tmp = RREG32(GRBM_SOFT_RESET);
5528
5529         tmp |= SOFT_RESET_RLC;
5530         WREG32(GRBM_SOFT_RESET, tmp);
5531         udelay(50);
5532         tmp &= ~SOFT_RESET_RLC;
5533         WREG32(GRBM_SOFT_RESET, tmp);
5534         udelay(50);
5535 }
5536
5537 static void si_rlc_stop(struct radeon_device *rdev)
5538 {
5539         WREG32(RLC_CNTL, 0);
5540
5541         si_enable_gui_idle_interrupt(rdev, false);
5542
5543         si_wait_for_rlc_serdes(rdev);
5544 }
5545
5546 static void si_rlc_start(struct radeon_device *rdev)
5547 {
5548         WREG32(RLC_CNTL, RLC_ENABLE);
5549
5550         si_enable_gui_idle_interrupt(rdev, true);
5551
5552         udelay(50);
5553 }
5554
5555 static bool si_lbpw_supported(struct radeon_device *rdev)
5556 {
5557         u32 tmp;
5558
5559         /* Enable LBPW only for DDR3 */
5560         tmp = RREG32(MC_SEQ_MISC0);
5561         if ((tmp & 0xF0000000) == 0xB0000000)
5562                 return true;
5563         return false;
5564 }
5565
5566 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5567 {
5568         u32 tmp;
5569
5570         tmp = RREG32(RLC_LB_CNTL);
5571         if (enable)
5572                 tmp |= LOAD_BALANCE_ENABLE;
5573         else
5574                 tmp &= ~LOAD_BALANCE_ENABLE;
5575         WREG32(RLC_LB_CNTL, tmp);
5576
5577         if (!enable) {
5578                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5579                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5580         }
5581 }
5582
5583 static int si_rlc_resume(struct radeon_device *rdev)
5584 {
5585         u32 i;
5586         const __be32 *fw_data;
5587
5588         if (!rdev->rlc_fw)
5589                 return -EINVAL;
5590
5591         si_rlc_stop(rdev);
5592
5593         si_rlc_reset(rdev);
5594
5595         si_init_pg(rdev);
5596
5597         si_init_cg(rdev);
5598
5599         WREG32(RLC_RL_BASE, 0);
5600         WREG32(RLC_RL_SIZE, 0);
5601         WREG32(RLC_LB_CNTL, 0);
5602         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5603         WREG32(RLC_LB_CNTR_INIT, 0);
5604         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5605
5606         WREG32(RLC_MC_CNTL, 0);
5607         WREG32(RLC_UCODE_CNTL, 0);
5608
5609         fw_data = (const __be32 *)rdev->rlc_fw->data;
5610         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5611                 WREG32(RLC_UCODE_ADDR, i);
5612                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5613         }
5614         WREG32(RLC_UCODE_ADDR, 0);
5615
5616         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5617
5618         si_rlc_start(rdev);
5619
5620         return 0;
5621 }
5622
5623 static void si_enable_interrupts(struct radeon_device *rdev)
5624 {
5625         u32 ih_cntl = RREG32(IH_CNTL);
5626         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5627
5628         ih_cntl |= ENABLE_INTR;
5629         ih_rb_cntl |= IH_RB_ENABLE;
5630         WREG32(IH_CNTL, ih_cntl);
5631         WREG32(IH_RB_CNTL, ih_rb_cntl);
5632         rdev->ih.enabled = true;
5633 }
5634
5635 static void si_disable_interrupts(struct radeon_device *rdev)
5636 {
5637         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5638         u32 ih_cntl = RREG32(IH_CNTL);
5639
5640         ih_rb_cntl &= ~IH_RB_ENABLE;
5641         ih_cntl &= ~ENABLE_INTR;
5642         WREG32(IH_RB_CNTL, ih_rb_cntl);
5643         WREG32(IH_CNTL, ih_cntl);
5644         /* set rptr, wptr to 0 */
5645         WREG32(IH_RB_RPTR, 0);
5646         WREG32(IH_RB_WPTR, 0);
5647         rdev->ih.enabled = false;
5648         rdev->ih.rptr = 0;
5649 }
5650
5651 static void si_disable_interrupt_state(struct radeon_device *rdev)
5652 {
5653         u32 tmp;
5654
5655         tmp = RREG32(CP_INT_CNTL_RING0) &
5656                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5657         WREG32(CP_INT_CNTL_RING0, tmp);
5658         WREG32(CP_INT_CNTL_RING1, 0);
5659         WREG32(CP_INT_CNTL_RING2, 0);
5660         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5661         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5662         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5663         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5664         WREG32(GRBM_INT_CNTL, 0);
5665         if (rdev->num_crtc >= 2) {
5666                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5667                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5668         }
5669         if (rdev->num_crtc >= 4) {
5670                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5671                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5672         }
5673         if (rdev->num_crtc >= 6) {
5674                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5675                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5676         }
5677
5678         if (rdev->num_crtc >= 2) {
5679                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5680                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5681         }
5682         if (rdev->num_crtc >= 4) {
5683                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5684                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5685         }
5686         if (rdev->num_crtc >= 6) {
5687                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5688                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5689         }
5690
5691         if (!ASIC_IS_NODCE(rdev)) {
5692                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5693
5694                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5695                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5696                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5697                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5698                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5699                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5700                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5701                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5702                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5703                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5704                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5705                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5706         }
5707 }
5708
5709 static int si_irq_init(struct radeon_device *rdev)
5710 {
5711         int ret = 0;
5712         int rb_bufsz;
5713         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5714
5715         /* allocate ring */
5716         ret = r600_ih_ring_alloc(rdev);
5717         if (ret)
5718                 return ret;
5719
5720         /* disable irqs */
5721         si_disable_interrupts(rdev);
5722
5723         /* init rlc */
5724         ret = si_rlc_resume(rdev);
5725         if (ret) {
5726                 r600_ih_ring_fini(rdev);
5727                 return ret;
5728         }
5729
5730         /* setup interrupt control */
5731         /* set dummy read address to ring address */
5732         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5733         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5734         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5735          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5736          */
5737         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5738         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5739         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5740         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5741
5742         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5743         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5744
5745         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5746                       IH_WPTR_OVERFLOW_CLEAR |
5747                       (rb_bufsz << 1));
5748
5749         if (rdev->wb.enabled)
5750                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5751
5752         /* set the writeback address whether it's enabled or not */
5753         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5754         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5755
5756         WREG32(IH_RB_CNTL, ih_rb_cntl);
5757
5758         /* set rptr, wptr to 0 */
5759         WREG32(IH_RB_RPTR, 0);
5760         WREG32(IH_RB_WPTR, 0);
5761
5762         /* Default settings for IH_CNTL (disabled at first) */
5763         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5764         /* RPTR_REARM only works if msi's are enabled */
5765         if (rdev->msi_enabled)
5766                 ih_cntl |= RPTR_REARM;
5767         WREG32(IH_CNTL, ih_cntl);
5768
5769         /* force the active interrupt state to all disabled */
5770         si_disable_interrupt_state(rdev);
5771
5772         pci_set_master(rdev->pdev);
5773
5774         /* enable irqs */
5775         si_enable_interrupts(rdev);
5776
5777         return ret;
5778 }
5779
5780 int si_irq_set(struct radeon_device *rdev)
5781 {
5782         u32 cp_int_cntl;
5783         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5784         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5785         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5786         u32 grbm_int_cntl = 0;
5787         u32 dma_cntl, dma_cntl1;
5788         u32 thermal_int = 0;
5789
5790         if (!rdev->irq.installed) {
5791                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5792                 return -EINVAL;
5793         }
5794         /* don't enable anything if the ih is disabled */
5795         if (!rdev->ih.enabled) {
5796                 si_disable_interrupts(rdev);
5797                 /* force the active interrupt state to all disabled */
5798                 si_disable_interrupt_state(rdev);
5799                 return 0;
5800         }
5801
5802         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5803                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5804
5805         if (!ASIC_IS_NODCE(rdev)) {
5806                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5807                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5808                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5809                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5810                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5811                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5812         }
5813
5814         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5815         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5816
5817         thermal_int = RREG32(CG_THERMAL_INT) &
5818                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5819
5820         /* enable CP interrupts on all rings */
5821         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5822                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5823                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5824         }
5825         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5826                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5827                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5828         }
5829         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5830                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5831                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5832         }
5833         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5834                 DRM_DEBUG("si_irq_set: sw int dma\n");
5835                 dma_cntl |= TRAP_ENABLE;
5836         }
5837
5838         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5839                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5840                 dma_cntl1 |= TRAP_ENABLE;
5841         }
5842         if (rdev->irq.crtc_vblank_int[0] ||
5843             atomic_read(&rdev->irq.pflip[0])) {
5844                 DRM_DEBUG("si_irq_set: vblank 0\n");
5845                 crtc1 |= VBLANK_INT_MASK;
5846         }
5847         if (rdev->irq.crtc_vblank_int[1] ||
5848             atomic_read(&rdev->irq.pflip[1])) {
5849                 DRM_DEBUG("si_irq_set: vblank 1\n");
5850                 crtc2 |= VBLANK_INT_MASK;
5851         }
5852         if (rdev->irq.crtc_vblank_int[2] ||
5853             atomic_read(&rdev->irq.pflip[2])) {
5854                 DRM_DEBUG("si_irq_set: vblank 2\n");
5855                 crtc3 |= VBLANK_INT_MASK;
5856         }
5857         if (rdev->irq.crtc_vblank_int[3] ||
5858             atomic_read(&rdev->irq.pflip[3])) {
5859                 DRM_DEBUG("si_irq_set: vblank 3\n");
5860                 crtc4 |= VBLANK_INT_MASK;
5861         }
5862         if (rdev->irq.crtc_vblank_int[4] ||
5863             atomic_read(&rdev->irq.pflip[4])) {
5864                 DRM_DEBUG("si_irq_set: vblank 4\n");
5865                 crtc5 |= VBLANK_INT_MASK;
5866         }
5867         if (rdev->irq.crtc_vblank_int[5] ||
5868             atomic_read(&rdev->irq.pflip[5])) {
5869                 DRM_DEBUG("si_irq_set: vblank 5\n");
5870                 crtc6 |= VBLANK_INT_MASK;
5871         }
5872         if (rdev->irq.hpd[0]) {
5873                 DRM_DEBUG("si_irq_set: hpd 1\n");
5874                 hpd1 |= DC_HPDx_INT_EN;
5875         }
5876         if (rdev->irq.hpd[1]) {
5877                 DRM_DEBUG("si_irq_set: hpd 2\n");
5878                 hpd2 |= DC_HPDx_INT_EN;
5879         }
5880         if (rdev->irq.hpd[2]) {
5881                 DRM_DEBUG("si_irq_set: hpd 3\n");
5882                 hpd3 |= DC_HPDx_INT_EN;
5883         }
5884         if (rdev->irq.hpd[3]) {
5885                 DRM_DEBUG("si_irq_set: hpd 4\n");
5886                 hpd4 |= DC_HPDx_INT_EN;
5887         }
5888         if (rdev->irq.hpd[4]) {
5889                 DRM_DEBUG("si_irq_set: hpd 5\n");
5890                 hpd5 |= DC_HPDx_INT_EN;
5891         }
5892         if (rdev->irq.hpd[5]) {
5893                 DRM_DEBUG("si_irq_set: hpd 6\n");
5894                 hpd6 |= DC_HPDx_INT_EN;
5895         }
5896
5897         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5898         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5899         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5900
5901         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5902         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5903
5904         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5905
5906         if (rdev->irq.dpm_thermal) {
5907                 DRM_DEBUG("dpm thermal\n");
5908                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5909         }
5910
5911         if (rdev->num_crtc >= 2) {
5912                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5913                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5914         }
5915         if (rdev->num_crtc >= 4) {
5916                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5917                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5918         }
5919         if (rdev->num_crtc >= 6) {
5920                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5921                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5922         }
5923
5924         if (rdev->num_crtc >= 2) {
5925                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
5926                        GRPH_PFLIP_INT_MASK);
5927                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
5928                        GRPH_PFLIP_INT_MASK);
5929         }
5930         if (rdev->num_crtc >= 4) {
5931                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
5932                        GRPH_PFLIP_INT_MASK);
5933                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
5934                        GRPH_PFLIP_INT_MASK);
5935         }
5936         if (rdev->num_crtc >= 6) {
5937                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
5938                        GRPH_PFLIP_INT_MASK);
5939                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
5940                        GRPH_PFLIP_INT_MASK);
5941         }
5942
5943         if (!ASIC_IS_NODCE(rdev)) {
5944                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5945                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5946                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5947                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5948                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5949                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5950         }
5951
5952         WREG32(CG_THERMAL_INT, thermal_int);
5953
5954         return 0;
5955 }
5956
5957 static inline void si_irq_ack(struct radeon_device *rdev)
5958 {
5959         u32 tmp;
5960
5961         if (ASIC_IS_NODCE(rdev))
5962                 return;
5963
5964         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5965         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5966         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5967         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5968         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5969         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5970         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5971         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5972         if (rdev->num_crtc >= 4) {
5973                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5974                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5975         }
5976         if (rdev->num_crtc >= 6) {
5977                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5978                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5979         }
5980
5981         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5982                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5983         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5984                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5985         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5986                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5987         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5988                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5989         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5990                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5991         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5992                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5993
5994         if (rdev->num_crtc >= 4) {
5995                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5996                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5997                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5998                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5999                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6000                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6001                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6002                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6003                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6004                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6005                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6006                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6007         }
6008
6009         if (rdev->num_crtc >= 6) {
6010                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6011                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6012                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6013                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6014                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6015                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6016                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6017                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6018                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6019                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6020                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6021                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6022         }
6023
6024         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6025                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6026                 tmp |= DC_HPDx_INT_ACK;
6027                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6028         }
6029         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6030                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6031                 tmp |= DC_HPDx_INT_ACK;
6032                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6033         }
6034         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6035                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6036                 tmp |= DC_HPDx_INT_ACK;
6037                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6038         }
6039         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6040                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6041                 tmp |= DC_HPDx_INT_ACK;
6042                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6043         }
6044         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6045                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6046                 tmp |= DC_HPDx_INT_ACK;
6047                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6048         }
6049         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6050                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6051                 tmp |= DC_HPDx_INT_ACK;
6052                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6053         }
6054 }
6055
6056 static void si_irq_disable(struct radeon_device *rdev)
6057 {
6058         si_disable_interrupts(rdev);
6059         /* Wait and acknowledge irq */
6060         mdelay(1);
6061         si_irq_ack(rdev);
6062         si_disable_interrupt_state(rdev);
6063 }
6064
6065 static void si_irq_suspend(struct radeon_device *rdev)
6066 {
6067         si_irq_disable(rdev);
6068         si_rlc_stop(rdev);
6069 }
6070
6071 static void si_irq_fini(struct radeon_device *rdev)
6072 {
6073         si_irq_suspend(rdev);
6074         r600_ih_ring_fini(rdev);
6075 }
6076
6077 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6078 {
6079         u32 wptr, tmp;
6080
6081         if (rdev->wb.enabled)
6082                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6083         else
6084                 wptr = RREG32(IH_RB_WPTR);
6085
6086         if (wptr & RB_OVERFLOW) {
6087                 /* When a ring buffer overflow happen start parsing interrupt
6088                  * from the last not overwritten vector (wptr + 16). Hopefully
6089                  * this should allow us to catchup.
6090                  */
6091                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6092                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6093                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6094                 tmp = RREG32(IH_RB_CNTL);
6095                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6096                 WREG32(IH_RB_CNTL, tmp);
6097         }
6098         return (wptr & rdev->ih.ptr_mask);
6099 }
6100
6101 /*        SI IV Ring
6102  * Each IV ring entry is 128 bits:
6103  * [7:0]    - interrupt source id
6104  * [31:8]   - reserved
6105  * [59:32]  - interrupt source data
6106  * [63:60]  - reserved
6107  * [71:64]  - RINGID
6108  * [79:72]  - VMID
6109  * [127:80] - reserved
6110  */
6111 int si_irq_process(struct radeon_device *rdev)
6112 {
6113         u32 wptr;
6114         u32 rptr;
6115         u32 src_id, src_data, ring_id;
6116         u32 ring_index;
6117         bool queue_hotplug = false;
6118         bool queue_thermal = false;
6119         u32 status, addr;
6120
6121         if (!rdev->ih.enabled || rdev->shutdown)
6122                 return IRQ_NONE;
6123
6124         wptr = si_get_ih_wptr(rdev);
6125
6126 restart_ih:
6127         /* is somebody else already processing irqs? */
6128         if (atomic_xchg(&rdev->ih.lock, 1))
6129                 return IRQ_NONE;
6130
6131         rptr = rdev->ih.rptr;
6132         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6133
6134         /* Order reading of wptr vs. reading of IH ring data */
6135         rmb();
6136
6137         /* display interrupts */
6138         si_irq_ack(rdev);
6139
6140         while (rptr != wptr) {
6141                 /* wptr/rptr are in bytes! */
6142                 ring_index = rptr / 4;
6143                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6144                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6145                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6146
6147                 switch (src_id) {
6148                 case 1: /* D1 vblank/vline */
6149                         switch (src_data) {
6150                         case 0: /* D1 vblank */
6151                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6152                                         if (rdev->irq.crtc_vblank_int[0]) {
6153                                                 drm_handle_vblank(rdev->ddev, 0);
6154                                                 rdev->pm.vblank_sync = true;
6155                                                 wake_up(&rdev->irq.vblank_queue);
6156                                         }
6157                                         if (atomic_read(&rdev->irq.pflip[0]))
6158                                                 radeon_crtc_handle_vblank(rdev, 0);
6159                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6160                                         DRM_DEBUG("IH: D1 vblank\n");
6161                                 }
6162                                 break;
6163                         case 1: /* D1 vline */
6164                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6165                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6166                                         DRM_DEBUG("IH: D1 vline\n");
6167                                 }
6168                                 break;
6169                         default:
6170                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6171                                 break;
6172                         }
6173                         break;
6174                 case 2: /* D2 vblank/vline */
6175                         switch (src_data) {
6176                         case 0: /* D2 vblank */
6177                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6178                                         if (rdev->irq.crtc_vblank_int[1]) {
6179                                                 drm_handle_vblank(rdev->ddev, 1);
6180                                                 rdev->pm.vblank_sync = true;
6181                                                 wake_up(&rdev->irq.vblank_queue);
6182                                         }
6183                                         if (atomic_read(&rdev->irq.pflip[1]))
6184                                                 radeon_crtc_handle_vblank(rdev, 1);
6185                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6186                                         DRM_DEBUG("IH: D2 vblank\n");
6187                                 }
6188                                 break;
6189                         case 1: /* D2 vline */
6190                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6191                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6192                                         DRM_DEBUG("IH: D2 vline\n");
6193                                 }
6194                                 break;
6195                         default:
6196                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6197                                 break;
6198                         }
6199                         break;
6200                 case 3: /* D3 vblank/vline */
6201                         switch (src_data) {
6202                         case 0: /* D3 vblank */
6203                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6204                                         if (rdev->irq.crtc_vblank_int[2]) {
6205                                                 drm_handle_vblank(rdev->ddev, 2);
6206                                                 rdev->pm.vblank_sync = true;
6207                                                 wake_up(&rdev->irq.vblank_queue);
6208                                         }
6209                                         if (atomic_read(&rdev->irq.pflip[2]))
6210                                                 radeon_crtc_handle_vblank(rdev, 2);
6211                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6212                                         DRM_DEBUG("IH: D3 vblank\n");
6213                                 }
6214                                 break;
6215                         case 1: /* D3 vline */
6216                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6217                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6218                                         DRM_DEBUG("IH: D3 vline\n");
6219                                 }
6220                                 break;
6221                         default:
6222                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6223                                 break;
6224                         }
6225                         break;
6226                 case 4: /* D4 vblank/vline */
6227                         switch (src_data) {
6228                         case 0: /* D4 vblank */
6229                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6230                                         if (rdev->irq.crtc_vblank_int[3]) {
6231                                                 drm_handle_vblank(rdev->ddev, 3);
6232                                                 rdev->pm.vblank_sync = true;
6233                                                 wake_up(&rdev->irq.vblank_queue);
6234                                         }
6235                                         if (atomic_read(&rdev->irq.pflip[3]))
6236                                                 radeon_crtc_handle_vblank(rdev, 3);
6237                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6238                                         DRM_DEBUG("IH: D4 vblank\n");
6239                                 }
6240                                 break;
6241                         case 1: /* D4 vline */
6242                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6243                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6244                                         DRM_DEBUG("IH: D4 vline\n");
6245                                 }
6246                                 break;
6247                         default:
6248                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6249                                 break;
6250                         }
6251                         break;
6252                 case 5: /* D5 vblank/vline */
6253                         switch (src_data) {
6254                         case 0: /* D5 vblank */
6255                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6256                                         if (rdev->irq.crtc_vblank_int[4]) {
6257                                                 drm_handle_vblank(rdev->ddev, 4);
6258                                                 rdev->pm.vblank_sync = true;
6259                                                 wake_up(&rdev->irq.vblank_queue);
6260                                         }
6261                                         if (atomic_read(&rdev->irq.pflip[4]))
6262                                                 radeon_crtc_handle_vblank(rdev, 4);
6263                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6264                                         DRM_DEBUG("IH: D5 vblank\n");
6265                                 }
6266                                 break;
6267                         case 1: /* D5 vline */
6268                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6269                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6270                                         DRM_DEBUG("IH: D5 vline\n");
6271                                 }
6272                                 break;
6273                         default:
6274                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6275                                 break;
6276                         }
6277                         break;
6278                 case 6: /* D6 vblank/vline */
6279                         switch (src_data) {
6280                         case 0: /* D6 vblank */
6281                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6282                                         if (rdev->irq.crtc_vblank_int[5]) {
6283                                                 drm_handle_vblank(rdev->ddev, 5);
6284                                                 rdev->pm.vblank_sync = true;
6285                                                 wake_up(&rdev->irq.vblank_queue);
6286                                         }
6287                                         if (atomic_read(&rdev->irq.pflip[5]))
6288                                                 radeon_crtc_handle_vblank(rdev, 5);
6289                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6290                                         DRM_DEBUG("IH: D6 vblank\n");
6291                                 }
6292                                 break;
6293                         case 1: /* D6 vline */
6294                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6295                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6296                                         DRM_DEBUG("IH: D6 vline\n");
6297                                 }
6298                                 break;
6299                         default:
6300                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6301                                 break;
6302                         }
6303                         break;
6304                 case 8: /* D1 page flip */
6305                 case 10: /* D2 page flip */
6306                 case 12: /* D3 page flip */
6307                 case 14: /* D4 page flip */
6308                 case 16: /* D5 page flip */
6309                 case 18: /* D6 page flip */
6310                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6311                         radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6312                         break;
6313                 case 42: /* HPD hotplug */
6314                         switch (src_data) {
6315                         case 0:
6316                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6317                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6318                                         queue_hotplug = true;
6319                                         DRM_DEBUG("IH: HPD1\n");
6320                                 }
6321                                 break;
6322                         case 1:
6323                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6324                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6325                                         queue_hotplug = true;
6326                                         DRM_DEBUG("IH: HPD2\n");
6327                                 }
6328                                 break;
6329                         case 2:
6330                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6331                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6332                                         queue_hotplug = true;
6333                                         DRM_DEBUG("IH: HPD3\n");
6334                                 }
6335                                 break;
6336                         case 3:
6337                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6338                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6339                                         queue_hotplug = true;
6340                                         DRM_DEBUG("IH: HPD4\n");
6341                                 }
6342                                 break;
6343                         case 4:
6344                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6345                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6346                                         queue_hotplug = true;
6347                                         DRM_DEBUG("IH: HPD5\n");
6348                                 }
6349                                 break;
6350                         case 5:
6351                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6352                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6353                                         queue_hotplug = true;
6354                                         DRM_DEBUG("IH: HPD6\n");
6355                                 }
6356                                 break;
6357                         default:
6358                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6359                                 break;
6360                         }
6361                         break;
6362                 case 124: /* UVD */
6363                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6364                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6365                         break;
6366                 case 146:
6367                 case 147:
6368                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6369                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6370                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6371                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6372                                 addr);
6373                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6374                                 status);
6375                         si_vm_decode_fault(rdev, status, addr);
6376                         /* reset addr and status */
6377                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6378                         break;
6379                 case 176: /* RINGID0 CP_INT */
6380                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6381                         break;
6382                 case 177: /* RINGID1 CP_INT */
6383                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6384                         break;
6385                 case 178: /* RINGID2 CP_INT */
6386                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6387                         break;
6388                 case 181: /* CP EOP event */
6389                         DRM_DEBUG("IH: CP EOP\n");
6390                         switch (ring_id) {
6391                         case 0:
6392                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6393                                 break;
6394                         case 1:
6395                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6396                                 break;
6397                         case 2:
6398                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6399                                 break;
6400                         }
6401                         break;
6402                 case 224: /* DMA trap event */
6403                         DRM_DEBUG("IH: DMA trap\n");
6404                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6405                         break;
6406                 case 230: /* thermal low to high */
6407                         DRM_DEBUG("IH: thermal low to high\n");
6408                         rdev->pm.dpm.thermal.high_to_low = false;
6409                         queue_thermal = true;
6410                         break;
6411                 case 231: /* thermal high to low */
6412                         DRM_DEBUG("IH: thermal high to low\n");
6413                         rdev->pm.dpm.thermal.high_to_low = true;
6414                         queue_thermal = true;
6415                         break;
6416                 case 233: /* GUI IDLE */
6417                         DRM_DEBUG("IH: GUI idle\n");
6418                         break;
6419                 case 244: /* DMA trap event */
6420                         DRM_DEBUG("IH: DMA1 trap\n");
6421                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6422                         break;
6423                 default:
6424                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6425                         break;
6426                 }
6427
6428                 /* wptr/rptr are in bytes! */
6429                 rptr += 16;
6430                 rptr &= rdev->ih.ptr_mask;
6431         }
6432         if (queue_hotplug)
6433                 schedule_work(&rdev->hotplug_work);
6434         if (queue_thermal && rdev->pm.dpm_enabled)
6435                 schedule_work(&rdev->pm.dpm.thermal.work);
6436         rdev->ih.rptr = rptr;
6437         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6438         atomic_set(&rdev->ih.lock, 0);
6439
6440         /* make sure wptr hasn't changed while processing */
6441         wptr = si_get_ih_wptr(rdev);
6442         if (wptr != rptr)
6443                 goto restart_ih;
6444
6445         return IRQ_HANDLED;
6446 }
6447
6448 /*
6449  * startup/shutdown callbacks
6450  */
6451 static int si_startup(struct radeon_device *rdev)
6452 {
6453         struct radeon_ring *ring;
6454         int r;
6455
6456         /* enable pcie gen2/3 link */
6457         si_pcie_gen3_enable(rdev);
6458         /* enable aspm */
6459         si_program_aspm(rdev);
6460
6461         /* scratch needs to be initialized before MC */
6462         r = r600_vram_scratch_init(rdev);
6463         if (r)
6464                 return r;
6465
6466         si_mc_program(rdev);
6467
6468         if (!rdev->pm.dpm_enabled) {
6469                 r = si_mc_load_microcode(rdev);
6470                 if (r) {
6471                         DRM_ERROR("Failed to load MC firmware!\n");
6472                         return r;
6473                 }
6474         }
6475
6476         r = si_pcie_gart_enable(rdev);
6477         if (r)
6478                 return r;
6479         si_gpu_init(rdev);
6480
6481         /* allocate rlc buffers */
6482         if (rdev->family == CHIP_VERDE) {
6483                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6484                 rdev->rlc.reg_list_size =
6485                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6486         }
6487         rdev->rlc.cs_data = si_cs_data;
6488         r = sumo_rlc_init(rdev);
6489         if (r) {
6490                 DRM_ERROR("Failed to init rlc BOs!\n");
6491                 return r;
6492         }
6493
6494         /* allocate wb buffer */
6495         r = radeon_wb_init(rdev);
6496         if (r)
6497                 return r;
6498
6499         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6500         if (r) {
6501                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6502                 return r;
6503         }
6504
6505         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6506         if (r) {
6507                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6508                 return r;
6509         }
6510
6511         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6512         if (r) {
6513                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6514                 return r;
6515         }
6516
6517         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6518         if (r) {
6519                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6520                 return r;
6521         }
6522
6523         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6524         if (r) {
6525                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6526                 return r;
6527         }
6528
6529         if (rdev->has_uvd) {
6530                 r = uvd_v2_2_resume(rdev);
6531                 if (!r) {
6532                         r = radeon_fence_driver_start_ring(rdev,
6533                                                            R600_RING_TYPE_UVD_INDEX);
6534                         if (r)
6535                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6536                 }
6537                 if (r)
6538                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6539         }
6540
6541         /* Enable IRQ */
6542         if (!rdev->irq.installed) {
6543                 r = radeon_irq_kms_init(rdev);
6544                 if (r)
6545                         return r;
6546         }
6547
6548         r = si_irq_init(rdev);
6549         if (r) {
6550                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6551                 radeon_irq_kms_fini(rdev);
6552                 return r;
6553         }
6554         si_irq_set(rdev);
6555
6556         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6557         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6558                              RADEON_CP_PACKET2);
6559         if (r)
6560                 return r;
6561
6562         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6563         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6564                              RADEON_CP_PACKET2);
6565         if (r)
6566                 return r;
6567
6568         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6569         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6570                              RADEON_CP_PACKET2);
6571         if (r)
6572                 return r;
6573
6574         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6575         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6576                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6577         if (r)
6578                 return r;
6579
6580         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6581         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6582                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6583         if (r)
6584                 return r;
6585
6586         r = si_cp_load_microcode(rdev);
6587         if (r)
6588                 return r;
6589         r = si_cp_resume(rdev);
6590         if (r)
6591                 return r;
6592
6593         r = cayman_dma_resume(rdev);
6594         if (r)
6595                 return r;
6596
6597         if (rdev->has_uvd) {
6598                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6599                 if (ring->ring_size) {
6600                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6601                                              RADEON_CP_PACKET2);
6602                         if (!r)
6603                                 r = uvd_v1_0_init(rdev);
6604                         if (r)
6605                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6606                 }
6607         }
6608
6609         r = radeon_ib_pool_init(rdev);
6610         if (r) {
6611                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6612                 return r;
6613         }
6614
6615         r = radeon_vm_manager_init(rdev);
6616         if (r) {
6617                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6618                 return r;
6619         }
6620
6621         r = dce6_audio_init(rdev);
6622         if (r)
6623                 return r;
6624
6625         return 0;
6626 }
6627
6628 int si_resume(struct radeon_device *rdev)
6629 {
6630         int r;
6631
6632         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6633          * posting will perform necessary task to bring back GPU into good
6634          * shape.
6635          */
6636         /* post card */
6637         atom_asic_init(rdev->mode_info.atom_context);
6638
6639         /* init golden registers */
6640         si_init_golden_registers(rdev);
6641
6642         if (rdev->pm.pm_method == PM_METHOD_DPM)
6643                 radeon_pm_resume(rdev);
6644
6645         rdev->accel_working = true;
6646         r = si_startup(rdev);
6647         if (r) {
6648                 DRM_ERROR("si startup failed on resume\n");
6649                 rdev->accel_working = false;
6650                 return r;
6651         }
6652
6653         return r;
6654
6655 }
6656
6657 int si_suspend(struct radeon_device *rdev)
6658 {
6659         radeon_pm_suspend(rdev);
6660         dce6_audio_fini(rdev);
6661         radeon_vm_manager_fini(rdev);
6662         si_cp_enable(rdev, false);
6663         cayman_dma_stop(rdev);
6664         if (rdev->has_uvd) {
6665                 uvd_v1_0_fini(rdev);
6666                 radeon_uvd_suspend(rdev);
6667         }
6668         si_fini_pg(rdev);
6669         si_fini_cg(rdev);
6670         si_irq_suspend(rdev);
6671         radeon_wb_disable(rdev);
6672         si_pcie_gart_disable(rdev);
6673         return 0;
6674 }
6675
6676 /* Plan is to move initialization in that function and use
6677  * helper function so that radeon_device_init pretty much
6678  * do nothing more than calling asic specific function. This
6679  * should also allow to remove a bunch of callback function
6680  * like vram_info.
6681  */
6682 int si_init(struct radeon_device *rdev)
6683 {
6684         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6685         int r;
6686
6687         /* Read BIOS */
6688         if (!radeon_get_bios(rdev)) {
6689                 if (ASIC_IS_AVIVO(rdev))
6690                         return -EINVAL;
6691         }
6692         /* Must be an ATOMBIOS */
6693         if (!rdev->is_atom_bios) {
6694                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6695                 return -EINVAL;
6696         }
6697         r = radeon_atombios_init(rdev);
6698         if (r)
6699                 return r;
6700
6701         /* Post card if necessary */
6702         if (!radeon_card_posted(rdev)) {
6703                 if (!rdev->bios) {
6704                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6705                         return -EINVAL;
6706                 }
6707                 DRM_INFO("GPU not posted. posting now...\n");
6708                 atom_asic_init(rdev->mode_info.atom_context);
6709         }
6710         /* init golden registers */
6711         si_init_golden_registers(rdev);
6712         /* Initialize scratch registers */
6713         si_scratch_init(rdev);
6714         /* Initialize surface registers */
6715         radeon_surface_init(rdev);
6716         /* Initialize clocks */
6717         radeon_get_clock_info(rdev->ddev);
6718
6719         /* Fence driver */
6720         r = radeon_fence_driver_init(rdev);
6721         if (r)
6722                 return r;
6723
6724         /* initialize memory controller */
6725         r = si_mc_init(rdev);
6726         if (r)
6727                 return r;
6728         /* Memory manager */
6729         r = radeon_bo_init(rdev);
6730         if (r)
6731                 return r;
6732
6733         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6734             !rdev->rlc_fw || !rdev->mc_fw) {
6735                 r = si_init_microcode(rdev);
6736                 if (r) {
6737                         DRM_ERROR("Failed to load firmware!\n");
6738                         return r;
6739                 }
6740         }
6741
6742         /* Initialize power management */
6743         radeon_pm_init(rdev);
6744
6745         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6746         ring->ring_obj = NULL;
6747         r600_ring_init(rdev, ring, 1024 * 1024);
6748
6749         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6750         ring->ring_obj = NULL;
6751         r600_ring_init(rdev, ring, 1024 * 1024);
6752
6753         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6754         ring->ring_obj = NULL;
6755         r600_ring_init(rdev, ring, 1024 * 1024);
6756
6757         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6758         ring->ring_obj = NULL;
6759         r600_ring_init(rdev, ring, 64 * 1024);
6760
6761         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6762         ring->ring_obj = NULL;
6763         r600_ring_init(rdev, ring, 64 * 1024);
6764
6765         if (rdev->has_uvd) {
6766                 r = radeon_uvd_init(rdev);
6767                 if (!r) {
6768                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6769                         ring->ring_obj = NULL;
6770                         r600_ring_init(rdev, ring, 4096);
6771                 }
6772         }
6773
6774         rdev->ih.ring_obj = NULL;
6775         r600_ih_ring_init(rdev, 64 * 1024);
6776
6777         r = r600_pcie_gart_init(rdev);
6778         if (r)
6779                 return r;
6780
6781         rdev->accel_working = true;
6782         r = si_startup(rdev);
6783         if (r) {
6784                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6785                 si_cp_fini(rdev);
6786                 cayman_dma_fini(rdev);
6787                 si_irq_fini(rdev);
6788                 sumo_rlc_fini(rdev);
6789                 radeon_wb_fini(rdev);
6790                 radeon_ib_pool_fini(rdev);
6791                 radeon_vm_manager_fini(rdev);
6792                 radeon_irq_kms_fini(rdev);
6793                 si_pcie_gart_fini(rdev);
6794                 rdev->accel_working = false;
6795         }
6796
6797         /* Don't start up if the MC ucode is missing.
6798          * The default clocks and voltages before the MC ucode
6799          * is loaded are not suffient for advanced operations.
6800          */
6801         if (!rdev->mc_fw) {
6802                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6803                 return -EINVAL;
6804         }
6805
6806         return 0;
6807 }
6808
6809 void si_fini(struct radeon_device *rdev)
6810 {
6811         radeon_pm_fini(rdev);
6812         si_cp_fini(rdev);
6813         cayman_dma_fini(rdev);
6814         si_fini_pg(rdev);
6815         si_fini_cg(rdev);
6816         si_irq_fini(rdev);
6817         sumo_rlc_fini(rdev);
6818         radeon_wb_fini(rdev);
6819         radeon_vm_manager_fini(rdev);
6820         radeon_ib_pool_fini(rdev);
6821         radeon_irq_kms_fini(rdev);
6822         if (rdev->has_uvd) {
6823                 uvd_v1_0_fini(rdev);
6824                 radeon_uvd_fini(rdev);
6825         }
6826         si_pcie_gart_fini(rdev);
6827         r600_vram_scratch_fini(rdev);
6828         radeon_gem_fini(rdev);
6829         radeon_fence_driver_fini(rdev);
6830         radeon_bo_fini(rdev);
6831         radeon_atombios_fini(rdev);
6832         kfree(rdev->bios);
6833         rdev->bios = NULL;
6834 }
6835
6836 /**
6837  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6838  *
6839  * @rdev: radeon_device pointer
6840  *
6841  * Fetches a GPU clock counter snapshot (SI).
6842  * Returns the 64 bit clock counter snapshot.
6843  */
6844 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6845 {
6846         uint64_t clock;
6847
6848         mutex_lock(&rdev->gpu_clock_mutex);
6849         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6850         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6851                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6852         mutex_unlock(&rdev->gpu_clock_mutex);
6853         return clock;
6854 }
6855
6856 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6857 {
6858         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6859         int r;
6860
6861         /* bypass vclk and dclk with bclk */
6862         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6863                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6864                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6865
6866         /* put PLL in bypass mode */
6867         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6868
6869         if (!vclk || !dclk) {
6870                 /* keep the Bypass mode, put PLL to sleep */
6871                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6872                 return 0;
6873         }
6874
6875         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6876                                           16384, 0x03FFFFFF, 0, 128, 5,
6877                                           &fb_div, &vclk_div, &dclk_div);
6878         if (r)
6879                 return r;
6880
6881         /* set RESET_ANTI_MUX to 0 */
6882         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6883
6884         /* set VCO_MODE to 1 */
6885         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6886
6887         /* toggle UPLL_SLEEP to 1 then back to 0 */
6888         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6889         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6890
6891         /* deassert UPLL_RESET */
6892         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6893
6894         mdelay(1);
6895
6896         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6897         if (r)
6898                 return r;
6899
6900         /* assert UPLL_RESET again */
6901         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6902
6903         /* disable spread spectrum. */
6904         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6905
6906         /* set feedback divider */
6907         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6908
6909         /* set ref divider to 0 */
6910         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6911
6912         if (fb_div < 307200)
6913                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6914         else
6915                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6916
6917         /* set PDIV_A and PDIV_B */
6918         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6919                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6920                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6921
6922         /* give the PLL some time to settle */
6923         mdelay(15);
6924
6925         /* deassert PLL_RESET */
6926         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6927
6928         mdelay(15);
6929
6930         /* switch from bypass mode to normal mode */
6931         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6932
6933         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6934         if (r)
6935                 return r;
6936
6937         /* switch VCLK and DCLK selection */
6938         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6939                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6940                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6941
6942         mdelay(100);
6943
6944         return 0;
6945 }
6946
6947 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6948 {
6949         struct pci_dev *root = rdev->pdev->bus->self;
6950         int bridge_pos, gpu_pos;
6951         u32 speed_cntl, mask, current_data_rate;
6952         int ret, i;
6953         u16 tmp16;
6954
6955         if (radeon_pcie_gen2 == 0)
6956                 return;
6957
6958         if (rdev->flags & RADEON_IS_IGP)
6959                 return;
6960
6961         if (!(rdev->flags & RADEON_IS_PCIE))
6962                 return;
6963
6964         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6965         if (ret != 0)
6966                 return;
6967
6968         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6969                 return;
6970
6971         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6972         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6973                 LC_CURRENT_DATA_RATE_SHIFT;
6974         if (mask & DRM_PCIE_SPEED_80) {
6975                 if (current_data_rate == 2) {
6976                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6977                         return;
6978                 }
6979                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6980         } else if (mask & DRM_PCIE_SPEED_50) {
6981                 if (current_data_rate == 1) {
6982                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6983                         return;
6984                 }
6985                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6986         }
6987
6988         bridge_pos = pci_pcie_cap(root);
6989         if (!bridge_pos)
6990                 return;
6991
6992         gpu_pos = pci_pcie_cap(rdev->pdev);
6993         if (!gpu_pos)
6994                 return;
6995
6996         if (mask & DRM_PCIE_SPEED_80) {
6997                 /* re-try equalization if gen3 is not already enabled */
6998                 if (current_data_rate != 2) {
6999                         u16 bridge_cfg, gpu_cfg;
7000                         u16 bridge_cfg2, gpu_cfg2;
7001                         u32 max_lw, current_lw, tmp;
7002
7003                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7004                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7005
7006                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7007                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7008
7009                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7010                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7011
7012                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7013                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7014                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7015
7016                         if (current_lw < max_lw) {
7017                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7018                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7019                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7020                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7021                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7022                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7023                                 }
7024                         }
7025
7026                         for (i = 0; i < 10; i++) {
7027                                 /* check status */
7028                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7029                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7030                                         break;
7031
7032                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7033                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7034
7035                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7036                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7037
7038                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7039                                 tmp |= LC_SET_QUIESCE;
7040                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7041
7042                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7043                                 tmp |= LC_REDO_EQ;
7044                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7045
7046                                 mdelay(100);
7047
7048                                 /* linkctl */
7049                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7050                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7051                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7052                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7053
7054                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7055                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7056                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7057                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7058
7059                                 /* linkctl2 */
7060                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7061                                 tmp16 &= ~((1 << 4) | (7 << 9));
7062                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7063                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7064
7065                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7066                                 tmp16 &= ~((1 << 4) | (7 << 9));
7067                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7068                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7069
7070                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7071                                 tmp &= ~LC_SET_QUIESCE;
7072                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7073                         }
7074                 }
7075         }
7076
7077         /* set the link speed */
7078         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7079         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7080         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7081
7082         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7083         tmp16 &= ~0xf;
7084         if (mask & DRM_PCIE_SPEED_80)
7085                 tmp16 |= 3; /* gen3 */
7086         else if (mask & DRM_PCIE_SPEED_50)
7087                 tmp16 |= 2; /* gen2 */
7088         else
7089                 tmp16 |= 1; /* gen1 */
7090         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7091
7092         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7093         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7094         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7095
7096         for (i = 0; i < rdev->usec_timeout; i++) {
7097                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7098                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7099                         break;
7100                 udelay(1);
7101         }
7102 }
7103
7104 static void si_program_aspm(struct radeon_device *rdev)
7105 {
7106         u32 data, orig;
7107         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7108         bool disable_clkreq = false;
7109
7110         if (radeon_aspm == 0)
7111                 return;
7112
7113         if (!(rdev->flags & RADEON_IS_PCIE))
7114                 return;
7115
7116         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7117         data &= ~LC_XMIT_N_FTS_MASK;
7118         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7119         if (orig != data)
7120                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7121
7122         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7123         data |= LC_GO_TO_RECOVERY;
7124         if (orig != data)
7125                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7126
7127         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7128         data |= P_IGNORE_EDB_ERR;
7129         if (orig != data)
7130                 WREG32_PCIE(PCIE_P_CNTL, data);
7131
7132         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7133         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7134         data |= LC_PMI_TO_L1_DIS;
7135         if (!disable_l0s)
7136                 data |= LC_L0S_INACTIVITY(7);
7137
7138         if (!disable_l1) {
7139                 data |= LC_L1_INACTIVITY(7);
7140                 data &= ~LC_PMI_TO_L1_DIS;
7141                 if (orig != data)
7142                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7143
7144                 if (!disable_plloff_in_l1) {
7145                         bool clk_req_support;
7146
7147                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7148                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7149                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7150                         if (orig != data)
7151                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7152
7153                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7154                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7155                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7156                         if (orig != data)
7157                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7158
7159                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7160                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7161                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7162                         if (orig != data)
7163                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7164
7165                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7166                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7167                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7168                         if (orig != data)
7169                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7170
7171                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7172                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7173                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7174                                 if (orig != data)
7175                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7176
7177                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7178                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7179                                 if (orig != data)
7180                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7181
7182                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7183                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7184                                 if (orig != data)
7185                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7186
7187                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7188                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7189                                 if (orig != data)
7190                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7191
7192                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7193                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7194                                 if (orig != data)
7195                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7196
7197                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7198                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7199                                 if (orig != data)
7200                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7201
7202                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7203                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7204                                 if (orig != data)
7205                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7206
7207                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7208                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7209                                 if (orig != data)
7210                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7211                         }
7212                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7213                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7214                         data |= LC_DYN_LANES_PWR_STATE(3);
7215                         if (orig != data)
7216                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7217
7218                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7219                         data &= ~LS2_EXIT_TIME_MASK;
7220                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7221                                 data |= LS2_EXIT_TIME(5);
7222                         if (orig != data)
7223                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7224
7225                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7226                         data &= ~LS2_EXIT_TIME_MASK;
7227                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7228                                 data |= LS2_EXIT_TIME(5);
7229                         if (orig != data)
7230                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7231
7232                         if (!disable_clkreq) {
7233                                 struct pci_dev *root = rdev->pdev->bus->self;
7234                                 u32 lnkcap;
7235
7236                                 clk_req_support = false;
7237                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7238                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7239                                         clk_req_support = true;
7240                         } else {
7241                                 clk_req_support = false;
7242                         }
7243
7244                         if (clk_req_support) {
7245                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7246                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7247                                 if (orig != data)
7248                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7249
7250                                 orig = data = RREG32(THM_CLK_CNTL);
7251                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7252                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7253                                 if (orig != data)
7254                                         WREG32(THM_CLK_CNTL, data);
7255
7256                                 orig = data = RREG32(MISC_CLK_CNTL);
7257                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7258                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7259                                 if (orig != data)
7260                                         WREG32(MISC_CLK_CNTL, data);
7261
7262                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7263                                 data &= ~BCLK_AS_XCLK;
7264                                 if (orig != data)
7265                                         WREG32(CG_CLKPIN_CNTL, data);
7266
7267                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7268                                 data &= ~FORCE_BIF_REFCLK_EN;
7269                                 if (orig != data)
7270                                         WREG32(CG_CLKPIN_CNTL_2, data);
7271
7272                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7273                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7274                                 data |= MPLL_CLKOUT_SEL(4);
7275                                 if (orig != data)
7276                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7277
7278                                 orig = data = RREG32(SPLL_CNTL_MODE);
7279                                 data &= ~SPLL_REFCLK_SEL_MASK;
7280                                 if (orig != data)
7281                                         WREG32(SPLL_CNTL_MODE, data);
7282                         }
7283                 }
7284         } else {
7285                 if (orig != data)
7286                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7287         }
7288
7289         orig = data = RREG32_PCIE(PCIE_CNTL2);
7290         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7291         if (orig != data)
7292                 WREG32_PCIE(PCIE_CNTL2, data);
7293
7294         if (!disable_l0s) {
7295                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7296                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7297                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7298                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7299                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7300                                 data &= ~LC_L0S_INACTIVITY_MASK;
7301                                 if (orig != data)
7302                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7303                         }
7304                 }
7305         }
7306 }