drm/radeon: initialize save/restore buffer for pg on verde
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
74 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
77 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
78 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
79
80 static const u32 verde_rlc_save_restore_register_list[] =
81 {
82         (0x8000 << 16) | (0x98f4 >> 2),
83         0x00000000,
84         (0x8040 << 16) | (0x98f4 >> 2),
85         0x00000000,
86         (0x8000 << 16) | (0xe80 >> 2),
87         0x00000000,
88         (0x8040 << 16) | (0xe80 >> 2),
89         0x00000000,
90         (0x8000 << 16) | (0x89bc >> 2),
91         0x00000000,
92         (0x8040 << 16) | (0x89bc >> 2),
93         0x00000000,
94         (0x8000 << 16) | (0x8c1c >> 2),
95         0x00000000,
96         (0x8040 << 16) | (0x8c1c >> 2),
97         0x00000000,
98         (0x9c00 << 16) | (0x98f0 >> 2),
99         0x00000000,
100         (0x9c00 << 16) | (0xe7c >> 2),
101         0x00000000,
102         (0x8000 << 16) | (0x9148 >> 2),
103         0x00000000,
104         (0x8040 << 16) | (0x9148 >> 2),
105         0x00000000,
106         (0x9c00 << 16) | (0x9150 >> 2),
107         0x00000000,
108         (0x9c00 << 16) | (0x897c >> 2),
109         0x00000000,
110         (0x9c00 << 16) | (0x8d8c >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0xac54 >> 2),
113         0X00000000,
114         0x3,
115         (0x9c00 << 16) | (0x98f8 >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0x9910 >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0x9914 >> 2),
120         0x00000000,
121         (0x9c00 << 16) | (0x9918 >> 2),
122         0x00000000,
123         (0x9c00 << 16) | (0x991c >> 2),
124         0x00000000,
125         (0x9c00 << 16) | (0x9920 >> 2),
126         0x00000000,
127         (0x9c00 << 16) | (0x9924 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x9928 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x992c >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x9930 >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x9934 >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x9938 >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x993c >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x9940 >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x9944 >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x9948 >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x994c >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x9950 >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x9954 >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x9958 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x995c >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x9960 >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x9964 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9968 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x996c >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x9970 >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x9974 >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x9978 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x997c >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9980 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9984 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9988 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x998c >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x8c00 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x8c14 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x8c04 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x8c08 >> 2),
188         0x00000000,
189         (0x8000 << 16) | (0x9b7c >> 2),
190         0x00000000,
191         (0x8040 << 16) | (0x9b7c >> 2),
192         0x00000000,
193         (0x8000 << 16) | (0xe84 >> 2),
194         0x00000000,
195         (0x8040 << 16) | (0xe84 >> 2),
196         0x00000000,
197         (0x8000 << 16) | (0x89c0 >> 2),
198         0x00000000,
199         (0x8040 << 16) | (0x89c0 >> 2),
200         0x00000000,
201         (0x8000 << 16) | (0x914c >> 2),
202         0x00000000,
203         (0x8040 << 16) | (0x914c >> 2),
204         0x00000000,
205         (0x8000 << 16) | (0x8c20 >> 2),
206         0x00000000,
207         (0x8040 << 16) | (0x8c20 >> 2),
208         0x00000000,
209         (0x8000 << 16) | (0x9354 >> 2),
210         0x00000000,
211         (0x8040 << 16) | (0x9354 >> 2),
212         0x00000000,
213         (0x9c00 << 16) | (0x9060 >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x9364 >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x9100 >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x913c >> 2),
220         0x00000000,
221         (0x8000 << 16) | (0x90e0 >> 2),
222         0x00000000,
223         (0x8000 << 16) | (0x90e4 >> 2),
224         0x00000000,
225         (0x8000 << 16) | (0x90e8 >> 2),
226         0x00000000,
227         (0x8040 << 16) | (0x90e0 >> 2),
228         0x00000000,
229         (0x8040 << 16) | (0x90e4 >> 2),
230         0x00000000,
231         (0x8040 << 16) | (0x90e8 >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x8bcc >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x8b24 >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x88c4 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x8e50 >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x8c0c >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x8e58 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8e5c >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x9508 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x950c >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x9494 >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0xac0c >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0xac10 >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0xac14 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0xae00 >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0xac08 >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0x88d4 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0x88c8 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0x88cc >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0x89b0 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0x8b10 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x8a14 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x9830 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x9834 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x9838 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x9a10 >> 2),
282         0x00000000,
283         (0x8000 << 16) | (0x9870 >> 2),
284         0x00000000,
285         (0x8000 << 16) | (0x9874 >> 2),
286         0x00000000,
287         (0x8001 << 16) | (0x9870 >> 2),
288         0x00000000,
289         (0x8001 << 16) | (0x9874 >> 2),
290         0x00000000,
291         (0x8040 << 16) | (0x9870 >> 2),
292         0x00000000,
293         (0x8040 << 16) | (0x9874 >> 2),
294         0x00000000,
295         (0x8041 << 16) | (0x9870 >> 2),
296         0x00000000,
297         (0x8041 << 16) | (0x9874 >> 2),
298         0x00000000,
299         0x00000000
300 };
301
302 static const u32 tahiti_golden_rlc_registers[] =
303 {
304         0xc424, 0xffffffff, 0x00601005,
305         0xc47c, 0xffffffff, 0x10104040,
306         0xc488, 0xffffffff, 0x0100000a,
307         0xc314, 0xffffffff, 0x00000800,
308         0xc30c, 0xffffffff, 0x800000f4,
309         0xf4a8, 0xffffffff, 0x00000000
310 };
311
312 static const u32 tahiti_golden_registers[] =
313 {
314         0x9a10, 0x00010000, 0x00018208,
315         0x9830, 0xffffffff, 0x00000000,
316         0x9834, 0xf00fffff, 0x00000400,
317         0x9838, 0x0002021c, 0x00020200,
318         0xc78, 0x00000080, 0x00000000,
319         0xd030, 0x000300c0, 0x00800040,
320         0xd830, 0x000300c0, 0x00800040,
321         0x5bb0, 0x000000f0, 0x00000070,
322         0x5bc0, 0x00200000, 0x50100000,
323         0x7030, 0x31000311, 0x00000011,
324         0x277c, 0x00000003, 0x000007ff,
325         0x240c, 0x000007ff, 0x00000000,
326         0x8a14, 0xf000001f, 0x00000007,
327         0x8b24, 0xffffffff, 0x00ffffff,
328         0x8b10, 0x0000ff0f, 0x00000000,
329         0x28a4c, 0x07ffffff, 0x4e000000,
330         0x28350, 0x3f3f3fff, 0x2a00126a,
331         0x30, 0x000000ff, 0x0040,
332         0x34, 0x00000040, 0x00004040,
333         0x9100, 0x07ffffff, 0x03000000,
334         0x8e88, 0x01ff1f3f, 0x00000000,
335         0x8e84, 0x01ff1f3f, 0x00000000,
336         0x9060, 0x0000007f, 0x00000020,
337         0x9508, 0x00010000, 0x00010000,
338         0xac14, 0x00000200, 0x000002fb,
339         0xac10, 0xffffffff, 0x0000543b,
340         0xac0c, 0xffffffff, 0xa9210876,
341         0x88d0, 0xffffffff, 0x000fff40,
342         0x88d4, 0x0000001f, 0x00000010,
343         0x1410, 0x20000000, 0x20fffed8,
344         0x15c0, 0x000c0fc0, 0x000c0400
345 };
346
347 static const u32 tahiti_golden_registers2[] =
348 {
349         0xc64, 0x00000001, 0x00000001
350 };
351
352 static const u32 pitcairn_golden_rlc_registers[] =
353 {
354         0xc424, 0xffffffff, 0x00601004,
355         0xc47c, 0xffffffff, 0x10102020,
356         0xc488, 0xffffffff, 0x01000020,
357         0xc314, 0xffffffff, 0x00000800,
358         0xc30c, 0xffffffff, 0x800000a4
359 };
360
361 static const u32 pitcairn_golden_registers[] =
362 {
363         0x9a10, 0x00010000, 0x00018208,
364         0x9830, 0xffffffff, 0x00000000,
365         0x9834, 0xf00fffff, 0x00000400,
366         0x9838, 0x0002021c, 0x00020200,
367         0xc78, 0x00000080, 0x00000000,
368         0xd030, 0x000300c0, 0x00800040,
369         0xd830, 0x000300c0, 0x00800040,
370         0x5bb0, 0x000000f0, 0x00000070,
371         0x5bc0, 0x00200000, 0x50100000,
372         0x7030, 0x31000311, 0x00000011,
373         0x2ae4, 0x00073ffe, 0x000022a2,
374         0x240c, 0x000007ff, 0x00000000,
375         0x8a14, 0xf000001f, 0x00000007,
376         0x8b24, 0xffffffff, 0x00ffffff,
377         0x8b10, 0x0000ff0f, 0x00000000,
378         0x28a4c, 0x07ffffff, 0x4e000000,
379         0x28350, 0x3f3f3fff, 0x2a00126a,
380         0x30, 0x000000ff, 0x0040,
381         0x34, 0x00000040, 0x00004040,
382         0x9100, 0x07ffffff, 0x03000000,
383         0x9060, 0x0000007f, 0x00000020,
384         0x9508, 0x00010000, 0x00010000,
385         0xac14, 0x000003ff, 0x000000f7,
386         0xac10, 0xffffffff, 0x00000000,
387         0xac0c, 0xffffffff, 0x32761054,
388         0x88d4, 0x0000001f, 0x00000010,
389         0x15c0, 0x000c0fc0, 0x000c0400
390 };
391
392 static const u32 verde_golden_rlc_registers[] =
393 {
394         0xc424, 0xffffffff, 0x033f1005,
395         0xc47c, 0xffffffff, 0x10808020,
396         0xc488, 0xffffffff, 0x00800008,
397         0xc314, 0xffffffff, 0x00001000,
398         0xc30c, 0xffffffff, 0x80010014
399 };
400
401 static const u32 verde_golden_registers[] =
402 {
403         0x9a10, 0x00010000, 0x00018208,
404         0x9830, 0xffffffff, 0x00000000,
405         0x9834, 0xf00fffff, 0x00000400,
406         0x9838, 0x0002021c, 0x00020200,
407         0xc78, 0x00000080, 0x00000000,
408         0xd030, 0x000300c0, 0x00800040,
409         0xd030, 0x000300c0, 0x00800040,
410         0xd830, 0x000300c0, 0x00800040,
411         0xd830, 0x000300c0, 0x00800040,
412         0x5bb0, 0x000000f0, 0x00000070,
413         0x5bc0, 0x00200000, 0x50100000,
414         0x7030, 0x31000311, 0x00000011,
415         0x2ae4, 0x00073ffe, 0x000022a2,
416         0x2ae4, 0x00073ffe, 0x000022a2,
417         0x2ae4, 0x00073ffe, 0x000022a2,
418         0x240c, 0x000007ff, 0x00000000,
419         0x240c, 0x000007ff, 0x00000000,
420         0x240c, 0x000007ff, 0x00000000,
421         0x8a14, 0xf000001f, 0x00000007,
422         0x8a14, 0xf000001f, 0x00000007,
423         0x8a14, 0xf000001f, 0x00000007,
424         0x8b24, 0xffffffff, 0x00ffffff,
425         0x8b10, 0x0000ff0f, 0x00000000,
426         0x28a4c, 0x07ffffff, 0x4e000000,
427         0x28350, 0x3f3f3fff, 0x0000124a,
428         0x28350, 0x3f3f3fff, 0x0000124a,
429         0x28350, 0x3f3f3fff, 0x0000124a,
430         0x30, 0x000000ff, 0x0040,
431         0x34, 0x00000040, 0x00004040,
432         0x9100, 0x07ffffff, 0x03000000,
433         0x9100, 0x07ffffff, 0x03000000,
434         0x8e88, 0x01ff1f3f, 0x00000000,
435         0x8e88, 0x01ff1f3f, 0x00000000,
436         0x8e88, 0x01ff1f3f, 0x00000000,
437         0x8e84, 0x01ff1f3f, 0x00000000,
438         0x8e84, 0x01ff1f3f, 0x00000000,
439         0x8e84, 0x01ff1f3f, 0x00000000,
440         0x9060, 0x0000007f, 0x00000020,
441         0x9508, 0x00010000, 0x00010000,
442         0xac14, 0x000003ff, 0x00000003,
443         0xac14, 0x000003ff, 0x00000003,
444         0xac14, 0x000003ff, 0x00000003,
445         0xac10, 0xffffffff, 0x00000000,
446         0xac10, 0xffffffff, 0x00000000,
447         0xac10, 0xffffffff, 0x00000000,
448         0xac0c, 0xffffffff, 0x00001032,
449         0xac0c, 0xffffffff, 0x00001032,
450         0xac0c, 0xffffffff, 0x00001032,
451         0x88d4, 0x0000001f, 0x00000010,
452         0x88d4, 0x0000001f, 0x00000010,
453         0x88d4, 0x0000001f, 0x00000010,
454         0x15c0, 0x000c0fc0, 0x000c0400
455 };
456
457 static const u32 oland_golden_rlc_registers[] =
458 {
459         0xc424, 0xffffffff, 0x00601005,
460         0xc47c, 0xffffffff, 0x10104040,
461         0xc488, 0xffffffff, 0x0100000a,
462         0xc314, 0xffffffff, 0x00000800,
463         0xc30c, 0xffffffff, 0x800000f4
464 };
465
466 static const u32 oland_golden_registers[] =
467 {
468         0x9a10, 0x00010000, 0x00018208,
469         0x9830, 0xffffffff, 0x00000000,
470         0x9834, 0xf00fffff, 0x00000400,
471         0x9838, 0x0002021c, 0x00020200,
472         0xc78, 0x00000080, 0x00000000,
473         0xd030, 0x000300c0, 0x00800040,
474         0xd830, 0x000300c0, 0x00800040,
475         0x5bb0, 0x000000f0, 0x00000070,
476         0x5bc0, 0x00200000, 0x50100000,
477         0x7030, 0x31000311, 0x00000011,
478         0x2ae4, 0x00073ffe, 0x000022a2,
479         0x240c, 0x000007ff, 0x00000000,
480         0x8a14, 0xf000001f, 0x00000007,
481         0x8b24, 0xffffffff, 0x00ffffff,
482         0x8b10, 0x0000ff0f, 0x00000000,
483         0x28a4c, 0x07ffffff, 0x4e000000,
484         0x28350, 0x3f3f3fff, 0x00000082,
485         0x30, 0x000000ff, 0x0040,
486         0x34, 0x00000040, 0x00004040,
487         0x9100, 0x07ffffff, 0x03000000,
488         0x9060, 0x0000007f, 0x00000020,
489         0x9508, 0x00010000, 0x00010000,
490         0xac14, 0x000003ff, 0x000000f3,
491         0xac10, 0xffffffff, 0x00000000,
492         0xac0c, 0xffffffff, 0x00003210,
493         0x88d4, 0x0000001f, 0x00000010,
494         0x15c0, 0x000c0fc0, 0x000c0400
495 };
496
497 static const u32 hainan_golden_registers[] =
498 {
499         0x9a10, 0x00010000, 0x00018208,
500         0x9830, 0xffffffff, 0x00000000,
501         0x9834, 0xf00fffff, 0x00000400,
502         0x9838, 0x0002021c, 0x00020200,
503         0xd0c0, 0xff000fff, 0x00000100,
504         0xd030, 0x000300c0, 0x00800040,
505         0xd8c0, 0xff000fff, 0x00000100,
506         0xd830, 0x000300c0, 0x00800040,
507         0x2ae4, 0x00073ffe, 0x000022a2,
508         0x240c, 0x000007ff, 0x00000000,
509         0x8a14, 0xf000001f, 0x00000007,
510         0x8b24, 0xffffffff, 0x00ffffff,
511         0x8b10, 0x0000ff0f, 0x00000000,
512         0x28a4c, 0x07ffffff, 0x4e000000,
513         0x28350, 0x3f3f3fff, 0x00000000,
514         0x30, 0x000000ff, 0x0040,
515         0x34, 0x00000040, 0x00004040,
516         0x9100, 0x03e00000, 0x03600000,
517         0x9060, 0x0000007f, 0x00000020,
518         0x9508, 0x00010000, 0x00010000,
519         0xac14, 0x000003ff, 0x000000f1,
520         0xac10, 0xffffffff, 0x00000000,
521         0xac0c, 0xffffffff, 0x00003210,
522         0x88d4, 0x0000001f, 0x00000010,
523         0x15c0, 0x000c0fc0, 0x000c0400
524 };
525
526 static const u32 hainan_golden_registers2[] =
527 {
528         0x98f8, 0xffffffff, 0x02010001
529 };
530
531 static const u32 tahiti_mgcg_cgcg_init[] =
532 {
533         0xc400, 0xffffffff, 0xfffffffc,
534         0x802c, 0xffffffff, 0xe0000000,
535         0x9a60, 0xffffffff, 0x00000100,
536         0x92a4, 0xffffffff, 0x00000100,
537         0xc164, 0xffffffff, 0x00000100,
538         0x9774, 0xffffffff, 0x00000100,
539         0x8984, 0xffffffff, 0x06000100,
540         0x8a18, 0xffffffff, 0x00000100,
541         0x92a0, 0xffffffff, 0x00000100,
542         0xc380, 0xffffffff, 0x00000100,
543         0x8b28, 0xffffffff, 0x00000100,
544         0x9144, 0xffffffff, 0x00000100,
545         0x8d88, 0xffffffff, 0x00000100,
546         0x8d8c, 0xffffffff, 0x00000100,
547         0x9030, 0xffffffff, 0x00000100,
548         0x9034, 0xffffffff, 0x00000100,
549         0x9038, 0xffffffff, 0x00000100,
550         0x903c, 0xffffffff, 0x00000100,
551         0xad80, 0xffffffff, 0x00000100,
552         0xac54, 0xffffffff, 0x00000100,
553         0x897c, 0xffffffff, 0x06000100,
554         0x9868, 0xffffffff, 0x00000100,
555         0x9510, 0xffffffff, 0x00000100,
556         0xaf04, 0xffffffff, 0x00000100,
557         0xae04, 0xffffffff, 0x00000100,
558         0x949c, 0xffffffff, 0x00000100,
559         0x802c, 0xffffffff, 0xe0000000,
560         0x9160, 0xffffffff, 0x00010000,
561         0x9164, 0xffffffff, 0x00030002,
562         0x9168, 0xffffffff, 0x00040007,
563         0x916c, 0xffffffff, 0x00060005,
564         0x9170, 0xffffffff, 0x00090008,
565         0x9174, 0xffffffff, 0x00020001,
566         0x9178, 0xffffffff, 0x00040003,
567         0x917c, 0xffffffff, 0x00000007,
568         0x9180, 0xffffffff, 0x00060005,
569         0x9184, 0xffffffff, 0x00090008,
570         0x9188, 0xffffffff, 0x00030002,
571         0x918c, 0xffffffff, 0x00050004,
572         0x9190, 0xffffffff, 0x00000008,
573         0x9194, 0xffffffff, 0x00070006,
574         0x9198, 0xffffffff, 0x000a0009,
575         0x919c, 0xffffffff, 0x00040003,
576         0x91a0, 0xffffffff, 0x00060005,
577         0x91a4, 0xffffffff, 0x00000009,
578         0x91a8, 0xffffffff, 0x00080007,
579         0x91ac, 0xffffffff, 0x000b000a,
580         0x91b0, 0xffffffff, 0x00050004,
581         0x91b4, 0xffffffff, 0x00070006,
582         0x91b8, 0xffffffff, 0x0008000b,
583         0x91bc, 0xffffffff, 0x000a0009,
584         0x91c0, 0xffffffff, 0x000d000c,
585         0x91c4, 0xffffffff, 0x00060005,
586         0x91c8, 0xffffffff, 0x00080007,
587         0x91cc, 0xffffffff, 0x0000000b,
588         0x91d0, 0xffffffff, 0x000a0009,
589         0x91d4, 0xffffffff, 0x000d000c,
590         0x91d8, 0xffffffff, 0x00070006,
591         0x91dc, 0xffffffff, 0x00090008,
592         0x91e0, 0xffffffff, 0x0000000c,
593         0x91e4, 0xffffffff, 0x000b000a,
594         0x91e8, 0xffffffff, 0x000e000d,
595         0x91ec, 0xffffffff, 0x00080007,
596         0x91f0, 0xffffffff, 0x000a0009,
597         0x91f4, 0xffffffff, 0x0000000d,
598         0x91f8, 0xffffffff, 0x000c000b,
599         0x91fc, 0xffffffff, 0x000f000e,
600         0x9200, 0xffffffff, 0x00090008,
601         0x9204, 0xffffffff, 0x000b000a,
602         0x9208, 0xffffffff, 0x000c000f,
603         0x920c, 0xffffffff, 0x000e000d,
604         0x9210, 0xffffffff, 0x00110010,
605         0x9214, 0xffffffff, 0x000a0009,
606         0x9218, 0xffffffff, 0x000c000b,
607         0x921c, 0xffffffff, 0x0000000f,
608         0x9220, 0xffffffff, 0x000e000d,
609         0x9224, 0xffffffff, 0x00110010,
610         0x9228, 0xffffffff, 0x000b000a,
611         0x922c, 0xffffffff, 0x000d000c,
612         0x9230, 0xffffffff, 0x00000010,
613         0x9234, 0xffffffff, 0x000f000e,
614         0x9238, 0xffffffff, 0x00120011,
615         0x923c, 0xffffffff, 0x000c000b,
616         0x9240, 0xffffffff, 0x000e000d,
617         0x9244, 0xffffffff, 0x00000011,
618         0x9248, 0xffffffff, 0x0010000f,
619         0x924c, 0xffffffff, 0x00130012,
620         0x9250, 0xffffffff, 0x000d000c,
621         0x9254, 0xffffffff, 0x000f000e,
622         0x9258, 0xffffffff, 0x00100013,
623         0x925c, 0xffffffff, 0x00120011,
624         0x9260, 0xffffffff, 0x00150014,
625         0x9264, 0xffffffff, 0x000e000d,
626         0x9268, 0xffffffff, 0x0010000f,
627         0x926c, 0xffffffff, 0x00000013,
628         0x9270, 0xffffffff, 0x00120011,
629         0x9274, 0xffffffff, 0x00150014,
630         0x9278, 0xffffffff, 0x000f000e,
631         0x927c, 0xffffffff, 0x00110010,
632         0x9280, 0xffffffff, 0x00000014,
633         0x9284, 0xffffffff, 0x00130012,
634         0x9288, 0xffffffff, 0x00160015,
635         0x928c, 0xffffffff, 0x0010000f,
636         0x9290, 0xffffffff, 0x00120011,
637         0x9294, 0xffffffff, 0x00000015,
638         0x9298, 0xffffffff, 0x00140013,
639         0x929c, 0xffffffff, 0x00170016,
640         0x9150, 0xffffffff, 0x96940200,
641         0x8708, 0xffffffff, 0x00900100,
642         0xc478, 0xffffffff, 0x00000080,
643         0xc404, 0xffffffff, 0x0020003f,
644         0x30, 0xffffffff, 0x0000001c,
645         0x34, 0x000f0000, 0x000f0000,
646         0x160c, 0xffffffff, 0x00000100,
647         0x1024, 0xffffffff, 0x00000100,
648         0x102c, 0x00000101, 0x00000000,
649         0x20a8, 0xffffffff, 0x00000104,
650         0x264c, 0x000c0000, 0x000c0000,
651         0x2648, 0x000c0000, 0x000c0000,
652         0x55e4, 0xff000fff, 0x00000100,
653         0x55e8, 0x00000001, 0x00000001,
654         0x2f50, 0x00000001, 0x00000001,
655         0x30cc, 0xc0000fff, 0x00000104,
656         0xc1e4, 0x00000001, 0x00000001,
657         0xd0c0, 0xfffffff0, 0x00000100,
658         0xd8c0, 0xfffffff0, 0x00000100
659 };
660
661 static const u32 pitcairn_mgcg_cgcg_init[] =
662 {
663         0xc400, 0xffffffff, 0xfffffffc,
664         0x802c, 0xffffffff, 0xe0000000,
665         0x9a60, 0xffffffff, 0x00000100,
666         0x92a4, 0xffffffff, 0x00000100,
667         0xc164, 0xffffffff, 0x00000100,
668         0x9774, 0xffffffff, 0x00000100,
669         0x8984, 0xffffffff, 0x06000100,
670         0x8a18, 0xffffffff, 0x00000100,
671         0x92a0, 0xffffffff, 0x00000100,
672         0xc380, 0xffffffff, 0x00000100,
673         0x8b28, 0xffffffff, 0x00000100,
674         0x9144, 0xffffffff, 0x00000100,
675         0x8d88, 0xffffffff, 0x00000100,
676         0x8d8c, 0xffffffff, 0x00000100,
677         0x9030, 0xffffffff, 0x00000100,
678         0x9034, 0xffffffff, 0x00000100,
679         0x9038, 0xffffffff, 0x00000100,
680         0x903c, 0xffffffff, 0x00000100,
681         0xad80, 0xffffffff, 0x00000100,
682         0xac54, 0xffffffff, 0x00000100,
683         0x897c, 0xffffffff, 0x06000100,
684         0x9868, 0xffffffff, 0x00000100,
685         0x9510, 0xffffffff, 0x00000100,
686         0xaf04, 0xffffffff, 0x00000100,
687         0xae04, 0xffffffff, 0x00000100,
688         0x949c, 0xffffffff, 0x00000100,
689         0x802c, 0xffffffff, 0xe0000000,
690         0x9160, 0xffffffff, 0x00010000,
691         0x9164, 0xffffffff, 0x00030002,
692         0x9168, 0xffffffff, 0x00040007,
693         0x916c, 0xffffffff, 0x00060005,
694         0x9170, 0xffffffff, 0x00090008,
695         0x9174, 0xffffffff, 0x00020001,
696         0x9178, 0xffffffff, 0x00040003,
697         0x917c, 0xffffffff, 0x00000007,
698         0x9180, 0xffffffff, 0x00060005,
699         0x9184, 0xffffffff, 0x00090008,
700         0x9188, 0xffffffff, 0x00030002,
701         0x918c, 0xffffffff, 0x00050004,
702         0x9190, 0xffffffff, 0x00000008,
703         0x9194, 0xffffffff, 0x00070006,
704         0x9198, 0xffffffff, 0x000a0009,
705         0x919c, 0xffffffff, 0x00040003,
706         0x91a0, 0xffffffff, 0x00060005,
707         0x91a4, 0xffffffff, 0x00000009,
708         0x91a8, 0xffffffff, 0x00080007,
709         0x91ac, 0xffffffff, 0x000b000a,
710         0x91b0, 0xffffffff, 0x00050004,
711         0x91b4, 0xffffffff, 0x00070006,
712         0x91b8, 0xffffffff, 0x0008000b,
713         0x91bc, 0xffffffff, 0x000a0009,
714         0x91c0, 0xffffffff, 0x000d000c,
715         0x9200, 0xffffffff, 0x00090008,
716         0x9204, 0xffffffff, 0x000b000a,
717         0x9208, 0xffffffff, 0x000c000f,
718         0x920c, 0xffffffff, 0x000e000d,
719         0x9210, 0xffffffff, 0x00110010,
720         0x9214, 0xffffffff, 0x000a0009,
721         0x9218, 0xffffffff, 0x000c000b,
722         0x921c, 0xffffffff, 0x0000000f,
723         0x9220, 0xffffffff, 0x000e000d,
724         0x9224, 0xffffffff, 0x00110010,
725         0x9228, 0xffffffff, 0x000b000a,
726         0x922c, 0xffffffff, 0x000d000c,
727         0x9230, 0xffffffff, 0x00000010,
728         0x9234, 0xffffffff, 0x000f000e,
729         0x9238, 0xffffffff, 0x00120011,
730         0x923c, 0xffffffff, 0x000c000b,
731         0x9240, 0xffffffff, 0x000e000d,
732         0x9244, 0xffffffff, 0x00000011,
733         0x9248, 0xffffffff, 0x0010000f,
734         0x924c, 0xffffffff, 0x00130012,
735         0x9250, 0xffffffff, 0x000d000c,
736         0x9254, 0xffffffff, 0x000f000e,
737         0x9258, 0xffffffff, 0x00100013,
738         0x925c, 0xffffffff, 0x00120011,
739         0x9260, 0xffffffff, 0x00150014,
740         0x9150, 0xffffffff, 0x96940200,
741         0x8708, 0xffffffff, 0x00900100,
742         0xc478, 0xffffffff, 0x00000080,
743         0xc404, 0xffffffff, 0x0020003f,
744         0x30, 0xffffffff, 0x0000001c,
745         0x34, 0x000f0000, 0x000f0000,
746         0x160c, 0xffffffff, 0x00000100,
747         0x1024, 0xffffffff, 0x00000100,
748         0x102c, 0x00000101, 0x00000000,
749         0x20a8, 0xffffffff, 0x00000104,
750         0x55e4, 0xff000fff, 0x00000100,
751         0x55e8, 0x00000001, 0x00000001,
752         0x2f50, 0x00000001, 0x00000001,
753         0x30cc, 0xc0000fff, 0x00000104,
754         0xc1e4, 0x00000001, 0x00000001,
755         0xd0c0, 0xfffffff0, 0x00000100,
756         0xd8c0, 0xfffffff0, 0x00000100
757 };
758
759 static const u32 verde_mgcg_cgcg_init[] =
760 {
761         0xc400, 0xffffffff, 0xfffffffc,
762         0x802c, 0xffffffff, 0xe0000000,
763         0x9a60, 0xffffffff, 0x00000100,
764         0x92a4, 0xffffffff, 0x00000100,
765         0xc164, 0xffffffff, 0x00000100,
766         0x9774, 0xffffffff, 0x00000100,
767         0x8984, 0xffffffff, 0x06000100,
768         0x8a18, 0xffffffff, 0x00000100,
769         0x92a0, 0xffffffff, 0x00000100,
770         0xc380, 0xffffffff, 0x00000100,
771         0x8b28, 0xffffffff, 0x00000100,
772         0x9144, 0xffffffff, 0x00000100,
773         0x8d88, 0xffffffff, 0x00000100,
774         0x8d8c, 0xffffffff, 0x00000100,
775         0x9030, 0xffffffff, 0x00000100,
776         0x9034, 0xffffffff, 0x00000100,
777         0x9038, 0xffffffff, 0x00000100,
778         0x903c, 0xffffffff, 0x00000100,
779         0xad80, 0xffffffff, 0x00000100,
780         0xac54, 0xffffffff, 0x00000100,
781         0x897c, 0xffffffff, 0x06000100,
782         0x9868, 0xffffffff, 0x00000100,
783         0x9510, 0xffffffff, 0x00000100,
784         0xaf04, 0xffffffff, 0x00000100,
785         0xae04, 0xffffffff, 0x00000100,
786         0x949c, 0xffffffff, 0x00000100,
787         0x802c, 0xffffffff, 0xe0000000,
788         0x9160, 0xffffffff, 0x00010000,
789         0x9164, 0xffffffff, 0x00030002,
790         0x9168, 0xffffffff, 0x00040007,
791         0x916c, 0xffffffff, 0x00060005,
792         0x9170, 0xffffffff, 0x00090008,
793         0x9174, 0xffffffff, 0x00020001,
794         0x9178, 0xffffffff, 0x00040003,
795         0x917c, 0xffffffff, 0x00000007,
796         0x9180, 0xffffffff, 0x00060005,
797         0x9184, 0xffffffff, 0x00090008,
798         0x9188, 0xffffffff, 0x00030002,
799         0x918c, 0xffffffff, 0x00050004,
800         0x9190, 0xffffffff, 0x00000008,
801         0x9194, 0xffffffff, 0x00070006,
802         0x9198, 0xffffffff, 0x000a0009,
803         0x919c, 0xffffffff, 0x00040003,
804         0x91a0, 0xffffffff, 0x00060005,
805         0x91a4, 0xffffffff, 0x00000009,
806         0x91a8, 0xffffffff, 0x00080007,
807         0x91ac, 0xffffffff, 0x000b000a,
808         0x91b0, 0xffffffff, 0x00050004,
809         0x91b4, 0xffffffff, 0x00070006,
810         0x91b8, 0xffffffff, 0x0008000b,
811         0x91bc, 0xffffffff, 0x000a0009,
812         0x91c0, 0xffffffff, 0x000d000c,
813         0x9200, 0xffffffff, 0x00090008,
814         0x9204, 0xffffffff, 0x000b000a,
815         0x9208, 0xffffffff, 0x000c000f,
816         0x920c, 0xffffffff, 0x000e000d,
817         0x9210, 0xffffffff, 0x00110010,
818         0x9214, 0xffffffff, 0x000a0009,
819         0x9218, 0xffffffff, 0x000c000b,
820         0x921c, 0xffffffff, 0x0000000f,
821         0x9220, 0xffffffff, 0x000e000d,
822         0x9224, 0xffffffff, 0x00110010,
823         0x9228, 0xffffffff, 0x000b000a,
824         0x922c, 0xffffffff, 0x000d000c,
825         0x9230, 0xffffffff, 0x00000010,
826         0x9234, 0xffffffff, 0x000f000e,
827         0x9238, 0xffffffff, 0x00120011,
828         0x923c, 0xffffffff, 0x000c000b,
829         0x9240, 0xffffffff, 0x000e000d,
830         0x9244, 0xffffffff, 0x00000011,
831         0x9248, 0xffffffff, 0x0010000f,
832         0x924c, 0xffffffff, 0x00130012,
833         0x9250, 0xffffffff, 0x000d000c,
834         0x9254, 0xffffffff, 0x000f000e,
835         0x9258, 0xffffffff, 0x00100013,
836         0x925c, 0xffffffff, 0x00120011,
837         0x9260, 0xffffffff, 0x00150014,
838         0x9150, 0xffffffff, 0x96940200,
839         0x8708, 0xffffffff, 0x00900100,
840         0xc478, 0xffffffff, 0x00000080,
841         0xc404, 0xffffffff, 0x0020003f,
842         0x30, 0xffffffff, 0x0000001c,
843         0x34, 0x000f0000, 0x000f0000,
844         0x160c, 0xffffffff, 0x00000100,
845         0x1024, 0xffffffff, 0x00000100,
846         0x102c, 0x00000101, 0x00000000,
847         0x20a8, 0xffffffff, 0x00000104,
848         0x264c, 0x000c0000, 0x000c0000,
849         0x2648, 0x000c0000, 0x000c0000,
850         0x55e4, 0xff000fff, 0x00000100,
851         0x55e8, 0x00000001, 0x00000001,
852         0x2f50, 0x00000001, 0x00000001,
853         0x30cc, 0xc0000fff, 0x00000104,
854         0xc1e4, 0x00000001, 0x00000001,
855         0xd0c0, 0xfffffff0, 0x00000100,
856         0xd8c0, 0xfffffff0, 0x00000100
857 };
858
859 static const u32 oland_mgcg_cgcg_init[] =
860 {
861         0xc400, 0xffffffff, 0xfffffffc,
862         0x802c, 0xffffffff, 0xe0000000,
863         0x9a60, 0xffffffff, 0x00000100,
864         0x92a4, 0xffffffff, 0x00000100,
865         0xc164, 0xffffffff, 0x00000100,
866         0x9774, 0xffffffff, 0x00000100,
867         0x8984, 0xffffffff, 0x06000100,
868         0x8a18, 0xffffffff, 0x00000100,
869         0x92a0, 0xffffffff, 0x00000100,
870         0xc380, 0xffffffff, 0x00000100,
871         0x8b28, 0xffffffff, 0x00000100,
872         0x9144, 0xffffffff, 0x00000100,
873         0x8d88, 0xffffffff, 0x00000100,
874         0x8d8c, 0xffffffff, 0x00000100,
875         0x9030, 0xffffffff, 0x00000100,
876         0x9034, 0xffffffff, 0x00000100,
877         0x9038, 0xffffffff, 0x00000100,
878         0x903c, 0xffffffff, 0x00000100,
879         0xad80, 0xffffffff, 0x00000100,
880         0xac54, 0xffffffff, 0x00000100,
881         0x897c, 0xffffffff, 0x06000100,
882         0x9868, 0xffffffff, 0x00000100,
883         0x9510, 0xffffffff, 0x00000100,
884         0xaf04, 0xffffffff, 0x00000100,
885         0xae04, 0xffffffff, 0x00000100,
886         0x949c, 0xffffffff, 0x00000100,
887         0x802c, 0xffffffff, 0xe0000000,
888         0x9160, 0xffffffff, 0x00010000,
889         0x9164, 0xffffffff, 0x00030002,
890         0x9168, 0xffffffff, 0x00040007,
891         0x916c, 0xffffffff, 0x00060005,
892         0x9170, 0xffffffff, 0x00090008,
893         0x9174, 0xffffffff, 0x00020001,
894         0x9178, 0xffffffff, 0x00040003,
895         0x917c, 0xffffffff, 0x00000007,
896         0x9180, 0xffffffff, 0x00060005,
897         0x9184, 0xffffffff, 0x00090008,
898         0x9188, 0xffffffff, 0x00030002,
899         0x918c, 0xffffffff, 0x00050004,
900         0x9190, 0xffffffff, 0x00000008,
901         0x9194, 0xffffffff, 0x00070006,
902         0x9198, 0xffffffff, 0x000a0009,
903         0x919c, 0xffffffff, 0x00040003,
904         0x91a0, 0xffffffff, 0x00060005,
905         0x91a4, 0xffffffff, 0x00000009,
906         0x91a8, 0xffffffff, 0x00080007,
907         0x91ac, 0xffffffff, 0x000b000a,
908         0x91b0, 0xffffffff, 0x00050004,
909         0x91b4, 0xffffffff, 0x00070006,
910         0x91b8, 0xffffffff, 0x0008000b,
911         0x91bc, 0xffffffff, 0x000a0009,
912         0x91c0, 0xffffffff, 0x000d000c,
913         0x91c4, 0xffffffff, 0x00060005,
914         0x91c8, 0xffffffff, 0x00080007,
915         0x91cc, 0xffffffff, 0x0000000b,
916         0x91d0, 0xffffffff, 0x000a0009,
917         0x91d4, 0xffffffff, 0x000d000c,
918         0x9150, 0xffffffff, 0x96940200,
919         0x8708, 0xffffffff, 0x00900100,
920         0xc478, 0xffffffff, 0x00000080,
921         0xc404, 0xffffffff, 0x0020003f,
922         0x30, 0xffffffff, 0x0000001c,
923         0x34, 0x000f0000, 0x000f0000,
924         0x160c, 0xffffffff, 0x00000100,
925         0x1024, 0xffffffff, 0x00000100,
926         0x102c, 0x00000101, 0x00000000,
927         0x20a8, 0xffffffff, 0x00000104,
928         0x264c, 0x000c0000, 0x000c0000,
929         0x2648, 0x000c0000, 0x000c0000,
930         0x55e4, 0xff000fff, 0x00000100,
931         0x55e8, 0x00000001, 0x00000001,
932         0x2f50, 0x00000001, 0x00000001,
933         0x30cc, 0xc0000fff, 0x00000104,
934         0xc1e4, 0x00000001, 0x00000001,
935         0xd0c0, 0xfffffff0, 0x00000100,
936         0xd8c0, 0xfffffff0, 0x00000100
937 };
938
939 static const u32 hainan_mgcg_cgcg_init[] =
940 {
941         0xc400, 0xffffffff, 0xfffffffc,
942         0x802c, 0xffffffff, 0xe0000000,
943         0x9a60, 0xffffffff, 0x00000100,
944         0x92a4, 0xffffffff, 0x00000100,
945         0xc164, 0xffffffff, 0x00000100,
946         0x9774, 0xffffffff, 0x00000100,
947         0x8984, 0xffffffff, 0x06000100,
948         0x8a18, 0xffffffff, 0x00000100,
949         0x92a0, 0xffffffff, 0x00000100,
950         0xc380, 0xffffffff, 0x00000100,
951         0x8b28, 0xffffffff, 0x00000100,
952         0x9144, 0xffffffff, 0x00000100,
953         0x8d88, 0xffffffff, 0x00000100,
954         0x8d8c, 0xffffffff, 0x00000100,
955         0x9030, 0xffffffff, 0x00000100,
956         0x9034, 0xffffffff, 0x00000100,
957         0x9038, 0xffffffff, 0x00000100,
958         0x903c, 0xffffffff, 0x00000100,
959         0xad80, 0xffffffff, 0x00000100,
960         0xac54, 0xffffffff, 0x00000100,
961         0x897c, 0xffffffff, 0x06000100,
962         0x9868, 0xffffffff, 0x00000100,
963         0x9510, 0xffffffff, 0x00000100,
964         0xaf04, 0xffffffff, 0x00000100,
965         0xae04, 0xffffffff, 0x00000100,
966         0x949c, 0xffffffff, 0x00000100,
967         0x802c, 0xffffffff, 0xe0000000,
968         0x9160, 0xffffffff, 0x00010000,
969         0x9164, 0xffffffff, 0x00030002,
970         0x9168, 0xffffffff, 0x00040007,
971         0x916c, 0xffffffff, 0x00060005,
972         0x9170, 0xffffffff, 0x00090008,
973         0x9174, 0xffffffff, 0x00020001,
974         0x9178, 0xffffffff, 0x00040003,
975         0x917c, 0xffffffff, 0x00000007,
976         0x9180, 0xffffffff, 0x00060005,
977         0x9184, 0xffffffff, 0x00090008,
978         0x9188, 0xffffffff, 0x00030002,
979         0x918c, 0xffffffff, 0x00050004,
980         0x9190, 0xffffffff, 0x00000008,
981         0x9194, 0xffffffff, 0x00070006,
982         0x9198, 0xffffffff, 0x000a0009,
983         0x919c, 0xffffffff, 0x00040003,
984         0x91a0, 0xffffffff, 0x00060005,
985         0x91a4, 0xffffffff, 0x00000009,
986         0x91a8, 0xffffffff, 0x00080007,
987         0x91ac, 0xffffffff, 0x000b000a,
988         0x91b0, 0xffffffff, 0x00050004,
989         0x91b4, 0xffffffff, 0x00070006,
990         0x91b8, 0xffffffff, 0x0008000b,
991         0x91bc, 0xffffffff, 0x000a0009,
992         0x91c0, 0xffffffff, 0x000d000c,
993         0x91c4, 0xffffffff, 0x00060005,
994         0x91c8, 0xffffffff, 0x00080007,
995         0x91cc, 0xffffffff, 0x0000000b,
996         0x91d0, 0xffffffff, 0x000a0009,
997         0x91d4, 0xffffffff, 0x000d000c,
998         0x9150, 0xffffffff, 0x96940200,
999         0x8708, 0xffffffff, 0x00900100,
1000         0xc478, 0xffffffff, 0x00000080,
1001         0xc404, 0xffffffff, 0x0020003f,
1002         0x30, 0xffffffff, 0x0000001c,
1003         0x34, 0x000f0000, 0x000f0000,
1004         0x160c, 0xffffffff, 0x00000100,
1005         0x1024, 0xffffffff, 0x00000100,
1006         0x20a8, 0xffffffff, 0x00000104,
1007         0x264c, 0x000c0000, 0x000c0000,
1008         0x2648, 0x000c0000, 0x000c0000,
1009         0x2f50, 0x00000001, 0x00000001,
1010         0x30cc, 0xc0000fff, 0x00000104,
1011         0xc1e4, 0x00000001, 0x00000001,
1012         0xd0c0, 0xfffffff0, 0x00000100,
1013         0xd8c0, 0xfffffff0, 0x00000100
1014 };
1015
1016 static u32 verde_pg_init[] =
1017 {
1018         0x353c, 0xffffffff, 0x40000,
1019         0x3538, 0xffffffff, 0x200010ff,
1020         0x353c, 0xffffffff, 0x0,
1021         0x353c, 0xffffffff, 0x0,
1022         0x353c, 0xffffffff, 0x0,
1023         0x353c, 0xffffffff, 0x0,
1024         0x353c, 0xffffffff, 0x0,
1025         0x353c, 0xffffffff, 0x7007,
1026         0x3538, 0xffffffff, 0x300010ff,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x400000,
1033         0x3538, 0xffffffff, 0x100010ff,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x120200,
1040         0x3538, 0xffffffff, 0x500010ff,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x1e1e16,
1047         0x3538, 0xffffffff, 0x600010ff,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x171f1e,
1054         0x3538, 0xffffffff, 0x700010ff,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x3538, 0xffffffff, 0x9ff,
1062         0x3500, 0xffffffff, 0x0,
1063         0x3504, 0xffffffff, 0x10000800,
1064         0x3504, 0xffffffff, 0xf,
1065         0x3504, 0xffffffff, 0xf,
1066         0x3500, 0xffffffff, 0x4,
1067         0x3504, 0xffffffff, 0x1000051e,
1068         0x3504, 0xffffffff, 0xffff,
1069         0x3504, 0xffffffff, 0xffff,
1070         0x3500, 0xffffffff, 0x8,
1071         0x3504, 0xffffffff, 0x80500,
1072         0x3500, 0xffffffff, 0x12,
1073         0x3504, 0xffffffff, 0x9050c,
1074         0x3500, 0xffffffff, 0x1d,
1075         0x3504, 0xffffffff, 0xb052c,
1076         0x3500, 0xffffffff, 0x2a,
1077         0x3504, 0xffffffff, 0x1053e,
1078         0x3500, 0xffffffff, 0x2d,
1079         0x3504, 0xffffffff, 0x10546,
1080         0x3500, 0xffffffff, 0x30,
1081         0x3504, 0xffffffff, 0xa054e,
1082         0x3500, 0xffffffff, 0x3c,
1083         0x3504, 0xffffffff, 0x1055f,
1084         0x3500, 0xffffffff, 0x3f,
1085         0x3504, 0xffffffff, 0x10567,
1086         0x3500, 0xffffffff, 0x42,
1087         0x3504, 0xffffffff, 0x1056f,
1088         0x3500, 0xffffffff, 0x45,
1089         0x3504, 0xffffffff, 0x10572,
1090         0x3500, 0xffffffff, 0x48,
1091         0x3504, 0xffffffff, 0x20575,
1092         0x3500, 0xffffffff, 0x4c,
1093         0x3504, 0xffffffff, 0x190801,
1094         0x3500, 0xffffffff, 0x67,
1095         0x3504, 0xffffffff, 0x1082a,
1096         0x3500, 0xffffffff, 0x6a,
1097         0x3504, 0xffffffff, 0x1b082d,
1098         0x3500, 0xffffffff, 0x87,
1099         0x3504, 0xffffffff, 0x310851,
1100         0x3500, 0xffffffff, 0xba,
1101         0x3504, 0xffffffff, 0x891,
1102         0x3500, 0xffffffff, 0xbc,
1103         0x3504, 0xffffffff, 0x893,
1104         0x3500, 0xffffffff, 0xbe,
1105         0x3504, 0xffffffff, 0x20895,
1106         0x3500, 0xffffffff, 0xc2,
1107         0x3504, 0xffffffff, 0x20899,
1108         0x3500, 0xffffffff, 0xc6,
1109         0x3504, 0xffffffff, 0x2089d,
1110         0x3500, 0xffffffff, 0xca,
1111         0x3504, 0xffffffff, 0x8a1,
1112         0x3500, 0xffffffff, 0xcc,
1113         0x3504, 0xffffffff, 0x8a3,
1114         0x3500, 0xffffffff, 0xce,
1115         0x3504, 0xffffffff, 0x308a5,
1116         0x3500, 0xffffffff, 0xd3,
1117         0x3504, 0xffffffff, 0x6d08cd,
1118         0x3500, 0xffffffff, 0x142,
1119         0x3504, 0xffffffff, 0x2000095a,
1120         0x3504, 0xffffffff, 0x1,
1121         0x3500, 0xffffffff, 0x144,
1122         0x3504, 0xffffffff, 0x301f095b,
1123         0x3500, 0xffffffff, 0x165,
1124         0x3504, 0xffffffff, 0xc094d,
1125         0x3500, 0xffffffff, 0x173,
1126         0x3504, 0xffffffff, 0xf096d,
1127         0x3500, 0xffffffff, 0x184,
1128         0x3504, 0xffffffff, 0x15097f,
1129         0x3500, 0xffffffff, 0x19b,
1130         0x3504, 0xffffffff, 0xc0998,
1131         0x3500, 0xffffffff, 0x1a9,
1132         0x3504, 0xffffffff, 0x409a7,
1133         0x3500, 0xffffffff, 0x1af,
1134         0x3504, 0xffffffff, 0xcdc,
1135         0x3500, 0xffffffff, 0x1b1,
1136         0x3504, 0xffffffff, 0x800,
1137         0x3508, 0xffffffff, 0x6c9b2000,
1138         0x3510, 0xfc00, 0x2000,
1139         0x3544, 0xffffffff, 0xfc0,
1140         0x28d4, 0x00000100, 0x100
1141 };
1142
1143 static void si_init_golden_registers(struct radeon_device *rdev)
1144 {
1145         switch (rdev->family) {
1146         case CHIP_TAHITI:
1147                 radeon_program_register_sequence(rdev,
1148                                                  tahiti_golden_registers,
1149                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1150                 radeon_program_register_sequence(rdev,
1151                                                  tahiti_golden_rlc_registers,
1152                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1153                 radeon_program_register_sequence(rdev,
1154                                                  tahiti_mgcg_cgcg_init,
1155                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1156                 radeon_program_register_sequence(rdev,
1157                                                  tahiti_golden_registers2,
1158                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1159                 break;
1160         case CHIP_PITCAIRN:
1161                 radeon_program_register_sequence(rdev,
1162                                                  pitcairn_golden_registers,
1163                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1164                 radeon_program_register_sequence(rdev,
1165                                                  pitcairn_golden_rlc_registers,
1166                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1167                 radeon_program_register_sequence(rdev,
1168                                                  pitcairn_mgcg_cgcg_init,
1169                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1170                 break;
1171         case CHIP_VERDE:
1172                 radeon_program_register_sequence(rdev,
1173                                                  verde_golden_registers,
1174                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1175                 radeon_program_register_sequence(rdev,
1176                                                  verde_golden_rlc_registers,
1177                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1178                 radeon_program_register_sequence(rdev,
1179                                                  verde_mgcg_cgcg_init,
1180                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1181                 radeon_program_register_sequence(rdev,
1182                                                  verde_pg_init,
1183                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1184                 break;
1185         case CHIP_OLAND:
1186                 radeon_program_register_sequence(rdev,
1187                                                  oland_golden_registers,
1188                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1189                 radeon_program_register_sequence(rdev,
1190                                                  oland_golden_rlc_registers,
1191                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1192                 radeon_program_register_sequence(rdev,
1193                                                  oland_mgcg_cgcg_init,
1194                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1195                 break;
1196         case CHIP_HAINAN:
1197                 radeon_program_register_sequence(rdev,
1198                                                  hainan_golden_registers,
1199                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1200                 radeon_program_register_sequence(rdev,
1201                                                  hainan_golden_registers2,
1202                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1203                 radeon_program_register_sequence(rdev,
1204                                                  hainan_mgcg_cgcg_init,
1205                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1206                 break;
1207         default:
1208                 break;
1209         }
1210 }
1211
1212 #define PCIE_BUS_CLK                10000
1213 #define TCLK                        (PCIE_BUS_CLK / 10)
1214
1215 /**
1216  * si_get_xclk - get the xclk
1217  *
1218  * @rdev: radeon_device pointer
1219  *
1220  * Returns the reference clock used by the gfx engine
1221  * (SI).
1222  */
1223 u32 si_get_xclk(struct radeon_device *rdev)
1224 {
1225         u32 reference_clock = rdev->clock.spll.reference_freq;
1226         u32 tmp;
1227
1228         tmp = RREG32(CG_CLKPIN_CNTL_2);
1229         if (tmp & MUX_TCLK_TO_XCLK)
1230                 return TCLK;
1231
1232         tmp = RREG32(CG_CLKPIN_CNTL);
1233         if (tmp & XTALIN_DIVIDE)
1234                 return reference_clock / 4;
1235
1236         return reference_clock;
1237 }
1238
1239 /* get temperature in millidegrees */
1240 int si_get_temp(struct radeon_device *rdev)
1241 {
1242         u32 temp;
1243         int actual_temp = 0;
1244
1245         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1246                 CTF_TEMP_SHIFT;
1247
1248         if (temp & 0x200)
1249                 actual_temp = 255;
1250         else
1251                 actual_temp = temp & 0x1ff;
1252
1253         actual_temp = (actual_temp * 1000);
1254
1255         return actual_temp;
1256 }
1257
1258 #define TAHITI_IO_MC_REGS_SIZE 36
1259
1260 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1261         {0x0000006f, 0x03044000},
1262         {0x00000070, 0x0480c018},
1263         {0x00000071, 0x00000040},
1264         {0x00000072, 0x01000000},
1265         {0x00000074, 0x000000ff},
1266         {0x00000075, 0x00143400},
1267         {0x00000076, 0x08ec0800},
1268         {0x00000077, 0x040000cc},
1269         {0x00000079, 0x00000000},
1270         {0x0000007a, 0x21000409},
1271         {0x0000007c, 0x00000000},
1272         {0x0000007d, 0xe8000000},
1273         {0x0000007e, 0x044408a8},
1274         {0x0000007f, 0x00000003},
1275         {0x00000080, 0x00000000},
1276         {0x00000081, 0x01000000},
1277         {0x00000082, 0x02000000},
1278         {0x00000083, 0x00000000},
1279         {0x00000084, 0xe3f3e4f4},
1280         {0x00000085, 0x00052024},
1281         {0x00000087, 0x00000000},
1282         {0x00000088, 0x66036603},
1283         {0x00000089, 0x01000000},
1284         {0x0000008b, 0x1c0a0000},
1285         {0x0000008c, 0xff010000},
1286         {0x0000008e, 0xffffefff},
1287         {0x0000008f, 0xfff3efff},
1288         {0x00000090, 0xfff3efbf},
1289         {0x00000094, 0x00101101},
1290         {0x00000095, 0x00000fff},
1291         {0x00000096, 0x00116fff},
1292         {0x00000097, 0x60010000},
1293         {0x00000098, 0x10010000},
1294         {0x00000099, 0x00006000},
1295         {0x0000009a, 0x00001000},
1296         {0x0000009f, 0x00a77400}
1297 };
1298
1299 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1300         {0x0000006f, 0x03044000},
1301         {0x00000070, 0x0480c018},
1302         {0x00000071, 0x00000040},
1303         {0x00000072, 0x01000000},
1304         {0x00000074, 0x000000ff},
1305         {0x00000075, 0x00143400},
1306         {0x00000076, 0x08ec0800},
1307         {0x00000077, 0x040000cc},
1308         {0x00000079, 0x00000000},
1309         {0x0000007a, 0x21000409},
1310         {0x0000007c, 0x00000000},
1311         {0x0000007d, 0xe8000000},
1312         {0x0000007e, 0x044408a8},
1313         {0x0000007f, 0x00000003},
1314         {0x00000080, 0x00000000},
1315         {0x00000081, 0x01000000},
1316         {0x00000082, 0x02000000},
1317         {0x00000083, 0x00000000},
1318         {0x00000084, 0xe3f3e4f4},
1319         {0x00000085, 0x00052024},
1320         {0x00000087, 0x00000000},
1321         {0x00000088, 0x66036603},
1322         {0x00000089, 0x01000000},
1323         {0x0000008b, 0x1c0a0000},
1324         {0x0000008c, 0xff010000},
1325         {0x0000008e, 0xffffefff},
1326         {0x0000008f, 0xfff3efff},
1327         {0x00000090, 0xfff3efbf},
1328         {0x00000094, 0x00101101},
1329         {0x00000095, 0x00000fff},
1330         {0x00000096, 0x00116fff},
1331         {0x00000097, 0x60010000},
1332         {0x00000098, 0x10010000},
1333         {0x00000099, 0x00006000},
1334         {0x0000009a, 0x00001000},
1335         {0x0000009f, 0x00a47400}
1336 };
1337
1338 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1339         {0x0000006f, 0x03044000},
1340         {0x00000070, 0x0480c018},
1341         {0x00000071, 0x00000040},
1342         {0x00000072, 0x01000000},
1343         {0x00000074, 0x000000ff},
1344         {0x00000075, 0x00143400},
1345         {0x00000076, 0x08ec0800},
1346         {0x00000077, 0x040000cc},
1347         {0x00000079, 0x00000000},
1348         {0x0000007a, 0x21000409},
1349         {0x0000007c, 0x00000000},
1350         {0x0000007d, 0xe8000000},
1351         {0x0000007e, 0x044408a8},
1352         {0x0000007f, 0x00000003},
1353         {0x00000080, 0x00000000},
1354         {0x00000081, 0x01000000},
1355         {0x00000082, 0x02000000},
1356         {0x00000083, 0x00000000},
1357         {0x00000084, 0xe3f3e4f4},
1358         {0x00000085, 0x00052024},
1359         {0x00000087, 0x00000000},
1360         {0x00000088, 0x66036603},
1361         {0x00000089, 0x01000000},
1362         {0x0000008b, 0x1c0a0000},
1363         {0x0000008c, 0xff010000},
1364         {0x0000008e, 0xffffefff},
1365         {0x0000008f, 0xfff3efff},
1366         {0x00000090, 0xfff3efbf},
1367         {0x00000094, 0x00101101},
1368         {0x00000095, 0x00000fff},
1369         {0x00000096, 0x00116fff},
1370         {0x00000097, 0x60010000},
1371         {0x00000098, 0x10010000},
1372         {0x00000099, 0x00006000},
1373         {0x0000009a, 0x00001000},
1374         {0x0000009f, 0x00a37400}
1375 };
1376
1377 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1378         {0x0000006f, 0x03044000},
1379         {0x00000070, 0x0480c018},
1380         {0x00000071, 0x00000040},
1381         {0x00000072, 0x01000000},
1382         {0x00000074, 0x000000ff},
1383         {0x00000075, 0x00143400},
1384         {0x00000076, 0x08ec0800},
1385         {0x00000077, 0x040000cc},
1386         {0x00000079, 0x00000000},
1387         {0x0000007a, 0x21000409},
1388         {0x0000007c, 0x00000000},
1389         {0x0000007d, 0xe8000000},
1390         {0x0000007e, 0x044408a8},
1391         {0x0000007f, 0x00000003},
1392         {0x00000080, 0x00000000},
1393         {0x00000081, 0x01000000},
1394         {0x00000082, 0x02000000},
1395         {0x00000083, 0x00000000},
1396         {0x00000084, 0xe3f3e4f4},
1397         {0x00000085, 0x00052024},
1398         {0x00000087, 0x00000000},
1399         {0x00000088, 0x66036603},
1400         {0x00000089, 0x01000000},
1401         {0x0000008b, 0x1c0a0000},
1402         {0x0000008c, 0xff010000},
1403         {0x0000008e, 0xffffefff},
1404         {0x0000008f, 0xfff3efff},
1405         {0x00000090, 0xfff3efbf},
1406         {0x00000094, 0x00101101},
1407         {0x00000095, 0x00000fff},
1408         {0x00000096, 0x00116fff},
1409         {0x00000097, 0x60010000},
1410         {0x00000098, 0x10010000},
1411         {0x00000099, 0x00006000},
1412         {0x0000009a, 0x00001000},
1413         {0x0000009f, 0x00a17730}
1414 };
1415
1416 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1417         {0x0000006f, 0x03044000},
1418         {0x00000070, 0x0480c018},
1419         {0x00000071, 0x00000040},
1420         {0x00000072, 0x01000000},
1421         {0x00000074, 0x000000ff},
1422         {0x00000075, 0x00143400},
1423         {0x00000076, 0x08ec0800},
1424         {0x00000077, 0x040000cc},
1425         {0x00000079, 0x00000000},
1426         {0x0000007a, 0x21000409},
1427         {0x0000007c, 0x00000000},
1428         {0x0000007d, 0xe8000000},
1429         {0x0000007e, 0x044408a8},
1430         {0x0000007f, 0x00000003},
1431         {0x00000080, 0x00000000},
1432         {0x00000081, 0x01000000},
1433         {0x00000082, 0x02000000},
1434         {0x00000083, 0x00000000},
1435         {0x00000084, 0xe3f3e4f4},
1436         {0x00000085, 0x00052024},
1437         {0x00000087, 0x00000000},
1438         {0x00000088, 0x66036603},
1439         {0x00000089, 0x01000000},
1440         {0x0000008b, 0x1c0a0000},
1441         {0x0000008c, 0xff010000},
1442         {0x0000008e, 0xffffefff},
1443         {0x0000008f, 0xfff3efff},
1444         {0x00000090, 0xfff3efbf},
1445         {0x00000094, 0x00101101},
1446         {0x00000095, 0x00000fff},
1447         {0x00000096, 0x00116fff},
1448         {0x00000097, 0x60010000},
1449         {0x00000098, 0x10010000},
1450         {0x00000099, 0x00006000},
1451         {0x0000009a, 0x00001000},
1452         {0x0000009f, 0x00a07730}
1453 };
1454
1455 /* ucode loading */
1456 static int si_mc_load_microcode(struct radeon_device *rdev)
1457 {
1458         const __be32 *fw_data;
1459         u32 running, blackout = 0;
1460         u32 *io_mc_regs;
1461         int i, ucode_size, regs_size;
1462
1463         if (!rdev->mc_fw)
1464                 return -EINVAL;
1465
1466         switch (rdev->family) {
1467         case CHIP_TAHITI:
1468                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1469                 ucode_size = SI_MC_UCODE_SIZE;
1470                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1471                 break;
1472         case CHIP_PITCAIRN:
1473                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1474                 ucode_size = SI_MC_UCODE_SIZE;
1475                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1476                 break;
1477         case CHIP_VERDE:
1478         default:
1479                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1480                 ucode_size = SI_MC_UCODE_SIZE;
1481                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1482                 break;
1483         case CHIP_OLAND:
1484                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1485                 ucode_size = OLAND_MC_UCODE_SIZE;
1486                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1487                 break;
1488         case CHIP_HAINAN:
1489                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1490                 ucode_size = OLAND_MC_UCODE_SIZE;
1491                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1492                 break;
1493         }
1494
1495         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1496
1497         if (running == 0) {
1498                 if (running) {
1499                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1500                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1501                 }
1502
1503                 /* reset the engine and set to writable */
1504                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1505                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1506
1507                 /* load mc io regs */
1508                 for (i = 0; i < regs_size; i++) {
1509                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1510                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1511                 }
1512                 /* load the MC ucode */
1513                 fw_data = (const __be32 *)rdev->mc_fw->data;
1514                 for (i = 0; i < ucode_size; i++)
1515                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1516
1517                 /* put the engine back into the active state */
1518                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1519                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1520                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1521
1522                 /* wait for training to complete */
1523                 for (i = 0; i < rdev->usec_timeout; i++) {
1524                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1525                                 break;
1526                         udelay(1);
1527                 }
1528                 for (i = 0; i < rdev->usec_timeout; i++) {
1529                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1530                                 break;
1531                         udelay(1);
1532                 }
1533
1534                 if (running)
1535                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1536         }
1537
1538         return 0;
1539 }
1540
1541 static int si_init_microcode(struct radeon_device *rdev)
1542 {
1543         struct platform_device *pdev;
1544         const char *chip_name;
1545         const char *rlc_chip_name;
1546         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1547         char fw_name[30];
1548         int err;
1549
1550         DRM_DEBUG("\n");
1551
1552         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1553         err = IS_ERR(pdev);
1554         if (err) {
1555                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1556                 return -EINVAL;
1557         }
1558
1559         switch (rdev->family) {
1560         case CHIP_TAHITI:
1561                 chip_name = "TAHITI";
1562                 rlc_chip_name = "TAHITI";
1563                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1564                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1565                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1566                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1567                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1568                 break;
1569         case CHIP_PITCAIRN:
1570                 chip_name = "PITCAIRN";
1571                 rlc_chip_name = "PITCAIRN";
1572                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1573                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1574                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1575                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1576                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1577                 break;
1578         case CHIP_VERDE:
1579                 chip_name = "VERDE";
1580                 rlc_chip_name = "VERDE";
1581                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1582                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1583                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1584                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1585                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1586                 break;
1587         case CHIP_OLAND:
1588                 chip_name = "OLAND";
1589                 rlc_chip_name = "OLAND";
1590                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1591                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1592                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1593                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1594                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1595                 break;
1596         case CHIP_HAINAN:
1597                 chip_name = "HAINAN";
1598                 rlc_chip_name = "HAINAN";
1599                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1600                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1601                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1602                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1603                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1604                 break;
1605         default: BUG();
1606         }
1607
1608         DRM_INFO("Loading %s Microcode\n", chip_name);
1609
1610         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1611         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1612         if (err)
1613                 goto out;
1614         if (rdev->pfp_fw->size != pfp_req_size) {
1615                 printk(KERN_ERR
1616                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1617                        rdev->pfp_fw->size, fw_name);
1618                 err = -EINVAL;
1619                 goto out;
1620         }
1621
1622         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1623         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1624         if (err)
1625                 goto out;
1626         if (rdev->me_fw->size != me_req_size) {
1627                 printk(KERN_ERR
1628                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1629                        rdev->me_fw->size, fw_name);
1630                 err = -EINVAL;
1631         }
1632
1633         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1634         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1635         if (err)
1636                 goto out;
1637         if (rdev->ce_fw->size != ce_req_size) {
1638                 printk(KERN_ERR
1639                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1640                        rdev->ce_fw->size, fw_name);
1641                 err = -EINVAL;
1642         }
1643
1644         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1645         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1646         if (err)
1647                 goto out;
1648         if (rdev->rlc_fw->size != rlc_req_size) {
1649                 printk(KERN_ERR
1650                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1651                        rdev->rlc_fw->size, fw_name);
1652                 err = -EINVAL;
1653         }
1654
1655         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1656         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1657         if (err)
1658                 goto out;
1659         if (rdev->mc_fw->size != mc_req_size) {
1660                 printk(KERN_ERR
1661                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1662                        rdev->mc_fw->size, fw_name);
1663                 err = -EINVAL;
1664         }
1665
1666 out:
1667         platform_device_unregister(pdev);
1668
1669         if (err) {
1670                 if (err != -EINVAL)
1671                         printk(KERN_ERR
1672                                "si_cp: Failed to load firmware \"%s\"\n",
1673                                fw_name);
1674                 release_firmware(rdev->pfp_fw);
1675                 rdev->pfp_fw = NULL;
1676                 release_firmware(rdev->me_fw);
1677                 rdev->me_fw = NULL;
1678                 release_firmware(rdev->ce_fw);
1679                 rdev->ce_fw = NULL;
1680                 release_firmware(rdev->rlc_fw);
1681                 rdev->rlc_fw = NULL;
1682                 release_firmware(rdev->mc_fw);
1683                 rdev->mc_fw = NULL;
1684         }
1685         return err;
1686 }
1687
1688 /* watermark setup */
1689 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1690                                    struct radeon_crtc *radeon_crtc,
1691                                    struct drm_display_mode *mode,
1692                                    struct drm_display_mode *other_mode)
1693 {
1694         u32 tmp;
1695         /*
1696          * Line Buffer Setup
1697          * There are 3 line buffers, each one shared by 2 display controllers.
1698          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1699          * the display controllers.  The paritioning is done via one of four
1700          * preset allocations specified in bits 21:20:
1701          *  0 - half lb
1702          *  2 - whole lb, other crtc must be disabled
1703          */
1704         /* this can get tricky if we have two large displays on a paired group
1705          * of crtcs.  Ideally for multiple large displays we'd assign them to
1706          * non-linked crtcs for maximum line buffer allocation.
1707          */
1708         if (radeon_crtc->base.enabled && mode) {
1709                 if (other_mode)
1710                         tmp = 0; /* 1/2 */
1711                 else
1712                         tmp = 2; /* whole */
1713         } else
1714                 tmp = 0;
1715
1716         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1717                DC_LB_MEMORY_CONFIG(tmp));
1718
1719         if (radeon_crtc->base.enabled && mode) {
1720                 switch (tmp) {
1721                 case 0:
1722                 default:
1723                         return 4096 * 2;
1724                 case 2:
1725                         return 8192 * 2;
1726                 }
1727         }
1728
1729         /* controller not enabled, so no lb used */
1730         return 0;
1731 }
1732
1733 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1734 {
1735         u32 tmp = RREG32(MC_SHARED_CHMAP);
1736
1737         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1738         case 0:
1739         default:
1740                 return 1;
1741         case 1:
1742                 return 2;
1743         case 2:
1744                 return 4;
1745         case 3:
1746                 return 8;
1747         case 4:
1748                 return 3;
1749         case 5:
1750                 return 6;
1751         case 6:
1752                 return 10;
1753         case 7:
1754                 return 12;
1755         case 8:
1756                 return 16;
1757         }
1758 }
1759
1760 struct dce6_wm_params {
1761         u32 dram_channels; /* number of dram channels */
1762         u32 yclk;          /* bandwidth per dram data pin in kHz */
1763         u32 sclk;          /* engine clock in kHz */
1764         u32 disp_clk;      /* display clock in kHz */
1765         u32 src_width;     /* viewport width */
1766         u32 active_time;   /* active display time in ns */
1767         u32 blank_time;    /* blank time in ns */
1768         bool interlaced;    /* mode is interlaced */
1769         fixed20_12 vsc;    /* vertical scale ratio */
1770         u32 num_heads;     /* number of active crtcs */
1771         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1772         u32 lb_size;       /* line buffer allocated to pipe */
1773         u32 vtaps;         /* vertical scaler taps */
1774 };
1775
1776 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1777 {
1778         /* Calculate raw DRAM Bandwidth */
1779         fixed20_12 dram_efficiency; /* 0.7 */
1780         fixed20_12 yclk, dram_channels, bandwidth;
1781         fixed20_12 a;
1782
1783         a.full = dfixed_const(1000);
1784         yclk.full = dfixed_const(wm->yclk);
1785         yclk.full = dfixed_div(yclk, a);
1786         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1787         a.full = dfixed_const(10);
1788         dram_efficiency.full = dfixed_const(7);
1789         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1790         bandwidth.full = dfixed_mul(dram_channels, yclk);
1791         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1792
1793         return dfixed_trunc(bandwidth);
1794 }
1795
1796 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1797 {
1798         /* Calculate DRAM Bandwidth and the part allocated to display. */
1799         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1800         fixed20_12 yclk, dram_channels, bandwidth;
1801         fixed20_12 a;
1802
1803         a.full = dfixed_const(1000);
1804         yclk.full = dfixed_const(wm->yclk);
1805         yclk.full = dfixed_div(yclk, a);
1806         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1807         a.full = dfixed_const(10);
1808         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1809         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1810         bandwidth.full = dfixed_mul(dram_channels, yclk);
1811         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1812
1813         return dfixed_trunc(bandwidth);
1814 }
1815
1816 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1817 {
1818         /* Calculate the display Data return Bandwidth */
1819         fixed20_12 return_efficiency; /* 0.8 */
1820         fixed20_12 sclk, bandwidth;
1821         fixed20_12 a;
1822
1823         a.full = dfixed_const(1000);
1824         sclk.full = dfixed_const(wm->sclk);
1825         sclk.full = dfixed_div(sclk, a);
1826         a.full = dfixed_const(10);
1827         return_efficiency.full = dfixed_const(8);
1828         return_efficiency.full = dfixed_div(return_efficiency, a);
1829         a.full = dfixed_const(32);
1830         bandwidth.full = dfixed_mul(a, sclk);
1831         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1832
1833         return dfixed_trunc(bandwidth);
1834 }
1835
1836 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1837 {
1838         return 32;
1839 }
1840
1841 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1842 {
1843         /* Calculate the DMIF Request Bandwidth */
1844         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1845         fixed20_12 disp_clk, sclk, bandwidth;
1846         fixed20_12 a, b1, b2;
1847         u32 min_bandwidth;
1848
1849         a.full = dfixed_const(1000);
1850         disp_clk.full = dfixed_const(wm->disp_clk);
1851         disp_clk.full = dfixed_div(disp_clk, a);
1852         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1853         b1.full = dfixed_mul(a, disp_clk);
1854
1855         a.full = dfixed_const(1000);
1856         sclk.full = dfixed_const(wm->sclk);
1857         sclk.full = dfixed_div(sclk, a);
1858         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1859         b2.full = dfixed_mul(a, sclk);
1860
1861         a.full = dfixed_const(10);
1862         disp_clk_request_efficiency.full = dfixed_const(8);
1863         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1864
1865         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1866
1867         a.full = dfixed_const(min_bandwidth);
1868         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1869
1870         return dfixed_trunc(bandwidth);
1871 }
1872
1873 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1874 {
1875         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1876         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1877         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1878         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1879
1880         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1881 }
1882
1883 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1884 {
1885         /* Calculate the display mode Average Bandwidth
1886          * DisplayMode should contain the source and destination dimensions,
1887          * timing, etc.
1888          */
1889         fixed20_12 bpp;
1890         fixed20_12 line_time;
1891         fixed20_12 src_width;
1892         fixed20_12 bandwidth;
1893         fixed20_12 a;
1894
1895         a.full = dfixed_const(1000);
1896         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1897         line_time.full = dfixed_div(line_time, a);
1898         bpp.full = dfixed_const(wm->bytes_per_pixel);
1899         src_width.full = dfixed_const(wm->src_width);
1900         bandwidth.full = dfixed_mul(src_width, bpp);
1901         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1902         bandwidth.full = dfixed_div(bandwidth, line_time);
1903
1904         return dfixed_trunc(bandwidth);
1905 }
1906
1907 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1908 {
1909         /* First calcualte the latency in ns */
1910         u32 mc_latency = 2000; /* 2000 ns. */
1911         u32 available_bandwidth = dce6_available_bandwidth(wm);
1912         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1913         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1914         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1915         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1916                 (wm->num_heads * cursor_line_pair_return_time);
1917         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1918         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1919         u32 tmp, dmif_size = 12288;
1920         fixed20_12 a, b, c;
1921
1922         if (wm->num_heads == 0)
1923                 return 0;
1924
1925         a.full = dfixed_const(2);
1926         b.full = dfixed_const(1);
1927         if ((wm->vsc.full > a.full) ||
1928             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1929             (wm->vtaps >= 5) ||
1930             ((wm->vsc.full >= a.full) && wm->interlaced))
1931                 max_src_lines_per_dst_line = 4;
1932         else
1933                 max_src_lines_per_dst_line = 2;
1934
1935         a.full = dfixed_const(available_bandwidth);
1936         b.full = dfixed_const(wm->num_heads);
1937         a.full = dfixed_div(a, b);
1938
1939         b.full = dfixed_const(mc_latency + 512);
1940         c.full = dfixed_const(wm->disp_clk);
1941         b.full = dfixed_div(b, c);
1942
1943         c.full = dfixed_const(dmif_size);
1944         b.full = dfixed_div(c, b);
1945
1946         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1947
1948         b.full = dfixed_const(1000);
1949         c.full = dfixed_const(wm->disp_clk);
1950         b.full = dfixed_div(c, b);
1951         c.full = dfixed_const(wm->bytes_per_pixel);
1952         b.full = dfixed_mul(b, c);
1953
1954         lb_fill_bw = min(tmp, dfixed_trunc(b));
1955
1956         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1957         b.full = dfixed_const(1000);
1958         c.full = dfixed_const(lb_fill_bw);
1959         b.full = dfixed_div(c, b);
1960         a.full = dfixed_div(a, b);
1961         line_fill_time = dfixed_trunc(a);
1962
1963         if (line_fill_time < wm->active_time)
1964                 return latency;
1965         else
1966                 return latency + (line_fill_time - wm->active_time);
1967
1968 }
1969
1970 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1971 {
1972         if (dce6_average_bandwidth(wm) <=
1973             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1974                 return true;
1975         else
1976                 return false;
1977 };
1978
1979 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1980 {
1981         if (dce6_average_bandwidth(wm) <=
1982             (dce6_available_bandwidth(wm) / wm->num_heads))
1983                 return true;
1984         else
1985                 return false;
1986 };
1987
1988 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1989 {
1990         u32 lb_partitions = wm->lb_size / wm->src_width;
1991         u32 line_time = wm->active_time + wm->blank_time;
1992         u32 latency_tolerant_lines;
1993         u32 latency_hiding;
1994         fixed20_12 a;
1995
1996         a.full = dfixed_const(1);
1997         if (wm->vsc.full > a.full)
1998                 latency_tolerant_lines = 1;
1999         else {
2000                 if (lb_partitions <= (wm->vtaps + 1))
2001                         latency_tolerant_lines = 1;
2002                 else
2003                         latency_tolerant_lines = 2;
2004         }
2005
2006         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2007
2008         if (dce6_latency_watermark(wm) <= latency_hiding)
2009                 return true;
2010         else
2011                 return false;
2012 }
2013
2014 static void dce6_program_watermarks(struct radeon_device *rdev,
2015                                          struct radeon_crtc *radeon_crtc,
2016                                          u32 lb_size, u32 num_heads)
2017 {
2018         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2019         struct dce6_wm_params wm_low, wm_high;
2020         u32 dram_channels;
2021         u32 pixel_period;
2022         u32 line_time = 0;
2023         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2024         u32 priority_a_mark = 0, priority_b_mark = 0;
2025         u32 priority_a_cnt = PRIORITY_OFF;
2026         u32 priority_b_cnt = PRIORITY_OFF;
2027         u32 tmp, arb_control3;
2028         fixed20_12 a, b, c;
2029
2030         if (radeon_crtc->base.enabled && num_heads && mode) {
2031                 pixel_period = 1000000 / (u32)mode->clock;
2032                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2033                 priority_a_cnt = 0;
2034                 priority_b_cnt = 0;
2035
2036                 if (rdev->family == CHIP_ARUBA)
2037                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2038                 else
2039                         dram_channels = si_get_number_of_dram_channels(rdev);
2040
2041                 /* watermark for high clocks */
2042                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2043                         wm_high.yclk =
2044                                 radeon_dpm_get_mclk(rdev, false) * 10;
2045                         wm_high.sclk =
2046                                 radeon_dpm_get_sclk(rdev, false) * 10;
2047                 } else {
2048                         wm_high.yclk = rdev->pm.current_mclk * 10;
2049                         wm_high.sclk = rdev->pm.current_sclk * 10;
2050                 }
2051
2052                 wm_high.disp_clk = mode->clock;
2053                 wm_high.src_width = mode->crtc_hdisplay;
2054                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2055                 wm_high.blank_time = line_time - wm_high.active_time;
2056                 wm_high.interlaced = false;
2057                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2058                         wm_high.interlaced = true;
2059                 wm_high.vsc = radeon_crtc->vsc;
2060                 wm_high.vtaps = 1;
2061                 if (radeon_crtc->rmx_type != RMX_OFF)
2062                         wm_high.vtaps = 2;
2063                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2064                 wm_high.lb_size = lb_size;
2065                 wm_high.dram_channels = dram_channels;
2066                 wm_high.num_heads = num_heads;
2067
2068                 /* watermark for low clocks */
2069                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2070                         wm_low.yclk =
2071                                 radeon_dpm_get_mclk(rdev, true) * 10;
2072                         wm_low.sclk =
2073                                 radeon_dpm_get_sclk(rdev, true) * 10;
2074                 } else {
2075                         wm_low.yclk = rdev->pm.current_mclk * 10;
2076                         wm_low.sclk = rdev->pm.current_sclk * 10;
2077                 }
2078
2079                 wm_low.disp_clk = mode->clock;
2080                 wm_low.src_width = mode->crtc_hdisplay;
2081                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2082                 wm_low.blank_time = line_time - wm_low.active_time;
2083                 wm_low.interlaced = false;
2084                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2085                         wm_low.interlaced = true;
2086                 wm_low.vsc = radeon_crtc->vsc;
2087                 wm_low.vtaps = 1;
2088                 if (radeon_crtc->rmx_type != RMX_OFF)
2089                         wm_low.vtaps = 2;
2090                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2091                 wm_low.lb_size = lb_size;
2092                 wm_low.dram_channels = dram_channels;
2093                 wm_low.num_heads = num_heads;
2094
2095                 /* set for high clocks */
2096                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2097                 /* set for low clocks */
2098                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2099
2100                 /* possibly force display priority to high */
2101                 /* should really do this at mode validation time... */
2102                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2103                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2104                     !dce6_check_latency_hiding(&wm_high) ||
2105                     (rdev->disp_priority == 2)) {
2106                         DRM_DEBUG_KMS("force priority to high\n");
2107                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2108                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2109                 }
2110                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2111                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2112                     !dce6_check_latency_hiding(&wm_low) ||
2113                     (rdev->disp_priority == 2)) {
2114                         DRM_DEBUG_KMS("force priority to high\n");
2115                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2116                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2117                 }
2118
2119                 a.full = dfixed_const(1000);
2120                 b.full = dfixed_const(mode->clock);
2121                 b.full = dfixed_div(b, a);
2122                 c.full = dfixed_const(latency_watermark_a);
2123                 c.full = dfixed_mul(c, b);
2124                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2125                 c.full = dfixed_div(c, a);
2126                 a.full = dfixed_const(16);
2127                 c.full = dfixed_div(c, a);
2128                 priority_a_mark = dfixed_trunc(c);
2129                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2130
2131                 a.full = dfixed_const(1000);
2132                 b.full = dfixed_const(mode->clock);
2133                 b.full = dfixed_div(b, a);
2134                 c.full = dfixed_const(latency_watermark_b);
2135                 c.full = dfixed_mul(c, b);
2136                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2137                 c.full = dfixed_div(c, a);
2138                 a.full = dfixed_const(16);
2139                 c.full = dfixed_div(c, a);
2140                 priority_b_mark = dfixed_trunc(c);
2141                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2142         }
2143
2144         /* select wm A */
2145         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2146         tmp = arb_control3;
2147         tmp &= ~LATENCY_WATERMARK_MASK(3);
2148         tmp |= LATENCY_WATERMARK_MASK(1);
2149         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2150         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2151                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2152                 LATENCY_HIGH_WATERMARK(line_time)));
2153         /* select wm B */
2154         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2155         tmp &= ~LATENCY_WATERMARK_MASK(3);
2156         tmp |= LATENCY_WATERMARK_MASK(2);
2157         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2158         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2159                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2160                 LATENCY_HIGH_WATERMARK(line_time)));
2161         /* restore original selection */
2162         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2163
2164         /* write the priority marks */
2165         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2166         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2167
2168 }
2169
2170 void dce6_bandwidth_update(struct radeon_device *rdev)
2171 {
2172         struct drm_display_mode *mode0 = NULL;
2173         struct drm_display_mode *mode1 = NULL;
2174         u32 num_heads = 0, lb_size;
2175         int i;
2176
2177         radeon_update_display_priority(rdev);
2178
2179         for (i = 0; i < rdev->num_crtc; i++) {
2180                 if (rdev->mode_info.crtcs[i]->base.enabled)
2181                         num_heads++;
2182         }
2183         for (i = 0; i < rdev->num_crtc; i += 2) {
2184                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2185                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2186                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2187                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2188                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2189                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2190         }
2191 }
2192
2193 /*
2194  * Core functions
2195  */
2196 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2197 {
2198         const u32 num_tile_mode_states = 32;
2199         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2200
2201         switch (rdev->config.si.mem_row_size_in_kb) {
2202         case 1:
2203                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2204                 break;
2205         case 2:
2206         default:
2207                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2208                 break;
2209         case 4:
2210                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2211                 break;
2212         }
2213
2214         if ((rdev->family == CHIP_TAHITI) ||
2215             (rdev->family == CHIP_PITCAIRN)) {
2216                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2217                         switch (reg_offset) {
2218                         case 0:  /* non-AA compressed depth or any compressed stencil */
2219                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2221                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2222                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2223                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2224                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2226                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2227                                 break;
2228                         case 1:  /* 2xAA/4xAA compressed depth only */
2229                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2231                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2232                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2233                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2234                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2235                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2236                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2237                                 break;
2238                         case 2:  /* 8xAA compressed depth only */
2239                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2241                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2242                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2243                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2244                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2245                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2246                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2247                                 break;
2248                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2249                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2251                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2252                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2253                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2254                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2257                                 break;
2258                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2259                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2260                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2261                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2262                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2263                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2264                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2266                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2267                                 break;
2268                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2269                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2271                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2272                                                  TILE_SPLIT(split_equal_to_row_size) |
2273                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2274                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2276                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2277                                 break;
2278                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2279                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2281                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282                                                  TILE_SPLIT(split_equal_to_row_size) |
2283                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2284                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2286                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2287                                 break;
2288                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2291                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292                                                  TILE_SPLIT(split_equal_to_row_size) |
2293                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2294                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2297                                 break;
2298                         case 8:  /* 1D and 1D Array Surfaces */
2299                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2300                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2301                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2302                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2303                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2304                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2307                                 break;
2308                         case 9:  /* Displayable maps. */
2309                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2311                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2312                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2313                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2314                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2316                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2317                                 break;
2318                         case 10:  /* Display 8bpp. */
2319                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2321                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2322                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2323                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2324                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2326                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2327                                 break;
2328                         case 11:  /* Display 16bpp. */
2329                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2332                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2333                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2334                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2336                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2337                                 break;
2338                         case 12:  /* Display 32bpp. */
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2342                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2343                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2344                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2346                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2347                                 break;
2348                         case 13:  /* Thin. */
2349                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2351                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2352                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2353                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2354                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2356                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2357                                 break;
2358                         case 14:  /* Thin 8 bpp. */
2359                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2361                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2362                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2363                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2364                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2367                                 break;
2368                         case 15:  /* Thin 16 bpp. */
2369                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2371                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2372                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2373                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2374                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2377                                 break;
2378                         case 16:  /* Thin 32 bpp. */
2379                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2381                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2382                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2383                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2384                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2387                                 break;
2388                         case 17:  /* Thin 64 bpp. */
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2391                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2392                                                  TILE_SPLIT(split_equal_to_row_size) |
2393                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2394                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2396                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2397                                 break;
2398                         case 21:  /* 8 bpp PRT. */
2399                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2401                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2402                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2403                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2404                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2405                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2406                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2407                                 break;
2408                         case 22:  /* 16 bpp PRT */
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2412                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2413                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2414                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2416                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2417                                 break;
2418                         case 23:  /* 32 bpp PRT */
2419                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2421                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2422                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2423                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2424                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2426                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2427                                 break;
2428                         case 24:  /* 64 bpp PRT */
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2431                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2433                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2434                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2437                                 break;
2438                         case 25:  /* 128 bpp PRT */
2439                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2441                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2443                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2444                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2447                                 break;
2448                         default:
2449                                 gb_tile_moden = 0;
2450                                 break;
2451                         }
2452                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2453                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2454                 }
2455         } else if ((rdev->family == CHIP_VERDE) ||
2456                    (rdev->family == CHIP_OLAND) ||
2457                    (rdev->family == CHIP_HAINAN)) {
2458                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2459                         switch (reg_offset) {
2460                         case 0:  /* non-AA compressed depth or any compressed stencil */
2461                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2463                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2464                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2465                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2466                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2468                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2469                                 break;
2470                         case 1:  /* 2xAA/4xAA compressed depth only */
2471                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2473                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2474                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2475                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2476                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2479                                 break;
2480                         case 2:  /* 8xAA compressed depth only */
2481                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2482                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2483                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2484                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2485                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2486                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2489                                 break;
2490                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2491                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2493                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2494                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2495                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2496                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2499                                 break;
2500                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2501                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2502                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2503                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2504                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2505                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2506                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2508                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2509                                 break;
2510                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2511                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2513                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2514                                                  TILE_SPLIT(split_equal_to_row_size) |
2515                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2516                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2518                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2519                                 break;
2520                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2521                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2524                                                  TILE_SPLIT(split_equal_to_row_size) |
2525                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2526                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2529                                 break;
2530                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2531                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2533                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2534                                                  TILE_SPLIT(split_equal_to_row_size) |
2535                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2536                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2539                                 break;
2540                         case 8:  /* 1D and 1D Array Surfaces */
2541                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2542                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2543                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2544                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2545                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2546                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2549                                 break;
2550                         case 9:  /* Displayable maps. */
2551                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2554                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2555                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2556                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559                                 break;
2560                         case 10:  /* Display 8bpp. */
2561                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2563                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2564                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2565                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2566                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2568                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2569                                 break;
2570                         case 11:  /* Display 16bpp. */
2571                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2573                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2575                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2576                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2578                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2579                                 break;
2580                         case 12:  /* Display 32bpp. */
2581                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2585                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2586                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589                                 break;
2590                         case 13:  /* Thin. */
2591                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2593                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2594                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2595                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2596                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2599                                 break;
2600                         case 14:  /* Thin 8 bpp. */
2601                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2603                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2605                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2606                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2609                                 break;
2610                         case 15:  /* Thin 16 bpp. */
2611                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2613                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2615                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2616                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2618                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2619                                 break;
2620                         case 16:  /* Thin 32 bpp. */
2621                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2623                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2624                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2625                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2626                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2629                                 break;
2630                         case 17:  /* Thin 64 bpp. */
2631                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2633                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634                                                  TILE_SPLIT(split_equal_to_row_size) |
2635                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2636                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2639                                 break;
2640                         case 21:  /* 8 bpp PRT. */
2641                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2644                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2646                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2647                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2649                                 break;
2650                         case 22:  /* 16 bpp PRT */
2651                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2654                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2655                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2656                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2658                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2659                                 break;
2660                         case 23:  /* 32 bpp PRT */
2661                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2663                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2664                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2665                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2666                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2668                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2669                                 break;
2670                         case 24:  /* 64 bpp PRT */
2671                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2674                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2675                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2676                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2679                                 break;
2680                         case 25:  /* 128 bpp PRT */
2681                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2684                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2685                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2686                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2689                                 break;
2690                         default:
2691                                 gb_tile_moden = 0;
2692                                 break;
2693                         }
2694                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2695                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2696                 }
2697         } else
2698                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2699 }
2700
2701 static void si_select_se_sh(struct radeon_device *rdev,
2702                             u32 se_num, u32 sh_num)
2703 {
2704         u32 data = INSTANCE_BROADCAST_WRITES;
2705
2706         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2707                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2708         else if (se_num == 0xffffffff)
2709                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2710         else if (sh_num == 0xffffffff)
2711                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2712         else
2713                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2714         WREG32(GRBM_GFX_INDEX, data);
2715 }
2716
2717 static u32 si_create_bitmask(u32 bit_width)
2718 {
2719         u32 i, mask = 0;
2720
2721         for (i = 0; i < bit_width; i++) {
2722                 mask <<= 1;
2723                 mask |= 1;
2724         }
2725         return mask;
2726 }
2727
2728 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2729 {
2730         u32 data, mask;
2731
2732         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2733         if (data & 1)
2734                 data &= INACTIVE_CUS_MASK;
2735         else
2736                 data = 0;
2737         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2738
2739         data >>= INACTIVE_CUS_SHIFT;
2740
2741         mask = si_create_bitmask(cu_per_sh);
2742
2743         return ~data & mask;
2744 }
2745
2746 static void si_setup_spi(struct radeon_device *rdev,
2747                          u32 se_num, u32 sh_per_se,
2748                          u32 cu_per_sh)
2749 {
2750         int i, j, k;
2751         u32 data, mask, active_cu;
2752
2753         for (i = 0; i < se_num; i++) {
2754                 for (j = 0; j < sh_per_se; j++) {
2755                         si_select_se_sh(rdev, i, j);
2756                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2757                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2758
2759                         mask = 1;
2760                         for (k = 0; k < 16; k++) {
2761                                 mask <<= k;
2762                                 if (active_cu & mask) {
2763                                         data &= ~mask;
2764                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2765                                         break;
2766                                 }
2767                         }
2768                 }
2769         }
2770         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2771 }
2772
2773 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2774                               u32 max_rb_num, u32 se_num,
2775                               u32 sh_per_se)
2776 {
2777         u32 data, mask;
2778
2779         data = RREG32(CC_RB_BACKEND_DISABLE);
2780         if (data & 1)
2781                 data &= BACKEND_DISABLE_MASK;
2782         else
2783                 data = 0;
2784         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2785
2786         data >>= BACKEND_DISABLE_SHIFT;
2787
2788         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2789
2790         return data & mask;
2791 }
2792
2793 static void si_setup_rb(struct radeon_device *rdev,
2794                         u32 se_num, u32 sh_per_se,
2795                         u32 max_rb_num)
2796 {
2797         int i, j;
2798         u32 data, mask;
2799         u32 disabled_rbs = 0;
2800         u32 enabled_rbs = 0;
2801
2802         for (i = 0; i < se_num; i++) {
2803                 for (j = 0; j < sh_per_se; j++) {
2804                         si_select_se_sh(rdev, i, j);
2805                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2806                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2807                 }
2808         }
2809         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2810
2811         mask = 1;
2812         for (i = 0; i < max_rb_num; i++) {
2813                 if (!(disabled_rbs & mask))
2814                         enabled_rbs |= mask;
2815                 mask <<= 1;
2816         }
2817
2818         for (i = 0; i < se_num; i++) {
2819                 si_select_se_sh(rdev, i, 0xffffffff);
2820                 data = 0;
2821                 for (j = 0; j < sh_per_se; j++) {
2822                         switch (enabled_rbs & 3) {
2823                         case 1:
2824                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2825                                 break;
2826                         case 2:
2827                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2828                                 break;
2829                         case 3:
2830                         default:
2831                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2832                                 break;
2833                         }
2834                         enabled_rbs >>= 2;
2835                 }
2836                 WREG32(PA_SC_RASTER_CONFIG, data);
2837         }
2838         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2839 }
2840
2841 static void si_gpu_init(struct radeon_device *rdev)
2842 {
2843         u32 gb_addr_config = 0;
2844         u32 mc_shared_chmap, mc_arb_ramcfg;
2845         u32 sx_debug_1;
2846         u32 hdp_host_path_cntl;
2847         u32 tmp;
2848         int i, j;
2849
2850         switch (rdev->family) {
2851         case CHIP_TAHITI:
2852                 rdev->config.si.max_shader_engines = 2;
2853                 rdev->config.si.max_tile_pipes = 12;
2854                 rdev->config.si.max_cu_per_sh = 8;
2855                 rdev->config.si.max_sh_per_se = 2;
2856                 rdev->config.si.max_backends_per_se = 4;
2857                 rdev->config.si.max_texture_channel_caches = 12;
2858                 rdev->config.si.max_gprs = 256;
2859                 rdev->config.si.max_gs_threads = 32;
2860                 rdev->config.si.max_hw_contexts = 8;
2861
2862                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2863                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2864                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2865                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2866                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2867                 break;
2868         case CHIP_PITCAIRN:
2869                 rdev->config.si.max_shader_engines = 2;
2870                 rdev->config.si.max_tile_pipes = 8;
2871                 rdev->config.si.max_cu_per_sh = 5;
2872                 rdev->config.si.max_sh_per_se = 2;
2873                 rdev->config.si.max_backends_per_se = 4;
2874                 rdev->config.si.max_texture_channel_caches = 8;
2875                 rdev->config.si.max_gprs = 256;
2876                 rdev->config.si.max_gs_threads = 32;
2877                 rdev->config.si.max_hw_contexts = 8;
2878
2879                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2880                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2881                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2882                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2883                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2884                 break;
2885         case CHIP_VERDE:
2886         default:
2887                 rdev->config.si.max_shader_engines = 1;
2888                 rdev->config.si.max_tile_pipes = 4;
2889                 rdev->config.si.max_cu_per_sh = 5;
2890                 rdev->config.si.max_sh_per_se = 2;
2891                 rdev->config.si.max_backends_per_se = 4;
2892                 rdev->config.si.max_texture_channel_caches = 4;
2893                 rdev->config.si.max_gprs = 256;
2894                 rdev->config.si.max_gs_threads = 32;
2895                 rdev->config.si.max_hw_contexts = 8;
2896
2897                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2898                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2899                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2900                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2901                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2902                 break;
2903         case CHIP_OLAND:
2904                 rdev->config.si.max_shader_engines = 1;
2905                 rdev->config.si.max_tile_pipes = 4;
2906                 rdev->config.si.max_cu_per_sh = 6;
2907                 rdev->config.si.max_sh_per_se = 1;
2908                 rdev->config.si.max_backends_per_se = 2;
2909                 rdev->config.si.max_texture_channel_caches = 4;
2910                 rdev->config.si.max_gprs = 256;
2911                 rdev->config.si.max_gs_threads = 16;
2912                 rdev->config.si.max_hw_contexts = 8;
2913
2914                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2915                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2916                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2917                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2918                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2919                 break;
2920         case CHIP_HAINAN:
2921                 rdev->config.si.max_shader_engines = 1;
2922                 rdev->config.si.max_tile_pipes = 4;
2923                 rdev->config.si.max_cu_per_sh = 5;
2924                 rdev->config.si.max_sh_per_se = 1;
2925                 rdev->config.si.max_backends_per_se = 1;
2926                 rdev->config.si.max_texture_channel_caches = 2;
2927                 rdev->config.si.max_gprs = 256;
2928                 rdev->config.si.max_gs_threads = 16;
2929                 rdev->config.si.max_hw_contexts = 8;
2930
2931                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2932                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2933                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2934                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2935                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2936                 break;
2937         }
2938
2939         /* Initialize HDP */
2940         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2941                 WREG32((0x2c14 + j), 0x00000000);
2942                 WREG32((0x2c18 + j), 0x00000000);
2943                 WREG32((0x2c1c + j), 0x00000000);
2944                 WREG32((0x2c20 + j), 0x00000000);
2945                 WREG32((0x2c24 + j), 0x00000000);
2946         }
2947
2948         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2949
2950         evergreen_fix_pci_max_read_req_size(rdev);
2951
2952         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2953
2954         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2955         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2956
2957         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2958         rdev->config.si.mem_max_burst_length_bytes = 256;
2959         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2960         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2961         if (rdev->config.si.mem_row_size_in_kb > 4)
2962                 rdev->config.si.mem_row_size_in_kb = 4;
2963         /* XXX use MC settings? */
2964         rdev->config.si.shader_engine_tile_size = 32;
2965         rdev->config.si.num_gpus = 1;
2966         rdev->config.si.multi_gpu_tile_size = 64;
2967
2968         /* fix up row size */
2969         gb_addr_config &= ~ROW_SIZE_MASK;
2970         switch (rdev->config.si.mem_row_size_in_kb) {
2971         case 1:
2972         default:
2973                 gb_addr_config |= ROW_SIZE(0);
2974                 break;
2975         case 2:
2976                 gb_addr_config |= ROW_SIZE(1);
2977                 break;
2978         case 4:
2979                 gb_addr_config |= ROW_SIZE(2);
2980                 break;
2981         }
2982
2983         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2984          * not have bank info, so create a custom tiling dword.
2985          * bits 3:0   num_pipes
2986          * bits 7:4   num_banks
2987          * bits 11:8  group_size
2988          * bits 15:12 row_size
2989          */
2990         rdev->config.si.tile_config = 0;
2991         switch (rdev->config.si.num_tile_pipes) {
2992         case 1:
2993                 rdev->config.si.tile_config |= (0 << 0);
2994                 break;
2995         case 2:
2996                 rdev->config.si.tile_config |= (1 << 0);
2997                 break;
2998         case 4:
2999                 rdev->config.si.tile_config |= (2 << 0);
3000                 break;
3001         case 8:
3002         default:
3003                 /* XXX what about 12? */
3004                 rdev->config.si.tile_config |= (3 << 0);
3005                 break;
3006         }       
3007         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3008         case 0: /* four banks */
3009                 rdev->config.si.tile_config |= 0 << 4;
3010                 break;
3011         case 1: /* eight banks */
3012                 rdev->config.si.tile_config |= 1 << 4;
3013                 break;
3014         case 2: /* sixteen banks */
3015         default:
3016                 rdev->config.si.tile_config |= 2 << 4;
3017                 break;
3018         }
3019         rdev->config.si.tile_config |=
3020                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3021         rdev->config.si.tile_config |=
3022                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3023
3024         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3025         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3026         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3027         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3028         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3029         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3030         if (rdev->has_uvd) {
3031                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3032                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3033                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3034         }
3035
3036         si_tiling_mode_table_init(rdev);
3037
3038         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3039                     rdev->config.si.max_sh_per_se,
3040                     rdev->config.si.max_backends_per_se);
3041
3042         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3043                      rdev->config.si.max_sh_per_se,
3044                      rdev->config.si.max_cu_per_sh);
3045
3046
3047         /* set HW defaults for 3D engine */
3048         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3049                                      ROQ_IB2_START(0x2b)));
3050         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3051
3052         sx_debug_1 = RREG32(SX_DEBUG_1);
3053         WREG32(SX_DEBUG_1, sx_debug_1);
3054
3055         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3056
3057         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3058                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3059                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3060                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3061
3062         WREG32(VGT_NUM_INSTANCES, 1);
3063
3064         WREG32(CP_PERFMON_CNTL, 0);
3065
3066         WREG32(SQ_CONFIG, 0);
3067
3068         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3069                                           FORCE_EOV_MAX_REZ_CNT(255)));
3070
3071         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3072                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3073
3074         WREG32(VGT_GS_VERTEX_REUSE, 16);
3075         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3076
3077         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3078         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3079         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3080         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3081         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3082         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3083         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3084         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3085
3086         tmp = RREG32(HDP_MISC_CNTL);
3087         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3088         WREG32(HDP_MISC_CNTL, tmp);
3089
3090         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3091         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3092
3093         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3094
3095         udelay(50);
3096 }
3097
3098 /*
3099  * GPU scratch registers helpers function.
3100  */
3101 static void si_scratch_init(struct radeon_device *rdev)
3102 {
3103         int i;
3104
3105         rdev->scratch.num_reg = 7;
3106         rdev->scratch.reg_base = SCRATCH_REG0;
3107         for (i = 0; i < rdev->scratch.num_reg; i++) {
3108                 rdev->scratch.free[i] = true;
3109                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3110         }
3111 }
3112
3113 void si_fence_ring_emit(struct radeon_device *rdev,
3114                         struct radeon_fence *fence)
3115 {
3116         struct radeon_ring *ring = &rdev->ring[fence->ring];
3117         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3118
3119         /* flush read cache over gart */
3120         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3121         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3122         radeon_ring_write(ring, 0);
3123         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3124         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3125                           PACKET3_TC_ACTION_ENA |
3126                           PACKET3_SH_KCACHE_ACTION_ENA |
3127                           PACKET3_SH_ICACHE_ACTION_ENA);
3128         radeon_ring_write(ring, 0xFFFFFFFF);
3129         radeon_ring_write(ring, 0);
3130         radeon_ring_write(ring, 10); /* poll interval */
3131         /* EVENT_WRITE_EOP - flush caches, send int */
3132         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3133         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3134         radeon_ring_write(ring, addr & 0xffffffff);
3135         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3136         radeon_ring_write(ring, fence->seq);
3137         radeon_ring_write(ring, 0);
3138 }
3139
3140 /*
3141  * IB stuff
3142  */
3143 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3144 {
3145         struct radeon_ring *ring = &rdev->ring[ib->ring];
3146         u32 header;
3147
3148         if (ib->is_const_ib) {
3149                 /* set switch buffer packet before const IB */
3150                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3151                 radeon_ring_write(ring, 0);
3152
3153                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3154         } else {
3155                 u32 next_rptr;
3156                 if (ring->rptr_save_reg) {
3157                         next_rptr = ring->wptr + 3 + 4 + 8;
3158                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3159                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3160                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3161                         radeon_ring_write(ring, next_rptr);
3162                 } else if (rdev->wb.enabled) {
3163                         next_rptr = ring->wptr + 5 + 4 + 8;
3164                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3165                         radeon_ring_write(ring, (1 << 8));
3166                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3167                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3168                         radeon_ring_write(ring, next_rptr);
3169                 }
3170
3171                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3172         }
3173
3174         radeon_ring_write(ring, header);
3175         radeon_ring_write(ring,
3176 #ifdef __BIG_ENDIAN
3177                           (2 << 0) |
3178 #endif
3179                           (ib->gpu_addr & 0xFFFFFFFC));
3180         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3181         radeon_ring_write(ring, ib->length_dw |
3182                           (ib->vm ? (ib->vm->id << 24) : 0));
3183
3184         if (!ib->is_const_ib) {
3185                 /* flush read cache over gart for this vmid */
3186                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3187                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3188                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3189                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3190                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3191                                   PACKET3_TC_ACTION_ENA |
3192                                   PACKET3_SH_KCACHE_ACTION_ENA |
3193                                   PACKET3_SH_ICACHE_ACTION_ENA);
3194                 radeon_ring_write(ring, 0xFFFFFFFF);
3195                 radeon_ring_write(ring, 0);
3196                 radeon_ring_write(ring, 10); /* poll interval */
3197         }
3198 }
3199
3200 /*
3201  * CP.
3202  */
3203 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3204 {
3205         if (enable)
3206                 WREG32(CP_ME_CNTL, 0);
3207         else {
3208                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3209                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3210                 WREG32(SCRATCH_UMSK, 0);
3211                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3212                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3213                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3214         }
3215         udelay(50);
3216 }
3217
3218 static int si_cp_load_microcode(struct radeon_device *rdev)
3219 {
3220         const __be32 *fw_data;
3221         int i;
3222
3223         if (!rdev->me_fw || !rdev->pfp_fw)
3224                 return -EINVAL;
3225
3226         si_cp_enable(rdev, false);
3227
3228         /* PFP */
3229         fw_data = (const __be32 *)rdev->pfp_fw->data;
3230         WREG32(CP_PFP_UCODE_ADDR, 0);
3231         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3232                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3233         WREG32(CP_PFP_UCODE_ADDR, 0);
3234
3235         /* CE */
3236         fw_data = (const __be32 *)rdev->ce_fw->data;
3237         WREG32(CP_CE_UCODE_ADDR, 0);
3238         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3239                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3240         WREG32(CP_CE_UCODE_ADDR, 0);
3241
3242         /* ME */
3243         fw_data = (const __be32 *)rdev->me_fw->data;
3244         WREG32(CP_ME_RAM_WADDR, 0);
3245         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3246                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3247         WREG32(CP_ME_RAM_WADDR, 0);
3248
3249         WREG32(CP_PFP_UCODE_ADDR, 0);
3250         WREG32(CP_CE_UCODE_ADDR, 0);
3251         WREG32(CP_ME_RAM_WADDR, 0);
3252         WREG32(CP_ME_RAM_RADDR, 0);
3253         return 0;
3254 }
3255
3256 static int si_cp_start(struct radeon_device *rdev)
3257 {
3258         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3259         int r, i;
3260
3261         r = radeon_ring_lock(rdev, ring, 7 + 4);
3262         if (r) {
3263                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3264                 return r;
3265         }
3266         /* init the CP */
3267         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3268         radeon_ring_write(ring, 0x1);
3269         radeon_ring_write(ring, 0x0);
3270         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3271         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3272         radeon_ring_write(ring, 0);
3273         radeon_ring_write(ring, 0);
3274
3275         /* init the CE partitions */
3276         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3277         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3278         radeon_ring_write(ring, 0xc000);
3279         radeon_ring_write(ring, 0xe000);
3280         radeon_ring_unlock_commit(rdev, ring);
3281
3282         si_cp_enable(rdev, true);
3283
3284         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3285         if (r) {
3286                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3287                 return r;
3288         }
3289
3290         /* setup clear context state */
3291         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3292         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3293
3294         for (i = 0; i < si_default_size; i++)
3295                 radeon_ring_write(ring, si_default_state[i]);
3296
3297         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3298         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3299
3300         /* set clear context state */
3301         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3302         radeon_ring_write(ring, 0);
3303
3304         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3305         radeon_ring_write(ring, 0x00000316);
3306         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3307         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3308
3309         radeon_ring_unlock_commit(rdev, ring);
3310
3311         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3312                 ring = &rdev->ring[i];
3313                 r = radeon_ring_lock(rdev, ring, 2);
3314
3315                 /* clear the compute context state */
3316                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3317                 radeon_ring_write(ring, 0);
3318
3319                 radeon_ring_unlock_commit(rdev, ring);
3320         }
3321
3322         return 0;
3323 }
3324
3325 static void si_cp_fini(struct radeon_device *rdev)
3326 {
3327         struct radeon_ring *ring;
3328         si_cp_enable(rdev, false);
3329
3330         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3331         radeon_ring_fini(rdev, ring);
3332         radeon_scratch_free(rdev, ring->rptr_save_reg);
3333
3334         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3335         radeon_ring_fini(rdev, ring);
3336         radeon_scratch_free(rdev, ring->rptr_save_reg);
3337
3338         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3339         radeon_ring_fini(rdev, ring);
3340         radeon_scratch_free(rdev, ring->rptr_save_reg);
3341 }
3342
3343 static int si_cp_resume(struct radeon_device *rdev)
3344 {
3345         struct radeon_ring *ring;
3346         u32 tmp;
3347         u32 rb_bufsz;
3348         int r;
3349
3350         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3351         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3352                                  SOFT_RESET_PA |
3353                                  SOFT_RESET_VGT |
3354                                  SOFT_RESET_SPI |
3355                                  SOFT_RESET_SX));
3356         RREG32(GRBM_SOFT_RESET);
3357         mdelay(15);
3358         WREG32(GRBM_SOFT_RESET, 0);
3359         RREG32(GRBM_SOFT_RESET);
3360
3361         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3362         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3363
3364         /* Set the write pointer delay */
3365         WREG32(CP_RB_WPTR_DELAY, 0);
3366
3367         WREG32(CP_DEBUG, 0);
3368         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3369
3370         /* ring 0 - compute and gfx */
3371         /* Set ring buffer size */
3372         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3373         rb_bufsz = drm_order(ring->ring_size / 8);
3374         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3375 #ifdef __BIG_ENDIAN
3376         tmp |= BUF_SWAP_32BIT;
3377 #endif
3378         WREG32(CP_RB0_CNTL, tmp);
3379
3380         /* Initialize the ring buffer's read and write pointers */
3381         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3382         ring->wptr = 0;
3383         WREG32(CP_RB0_WPTR, ring->wptr);
3384
3385         /* set the wb address whether it's enabled or not */
3386         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3387         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3388
3389         if (rdev->wb.enabled)
3390                 WREG32(SCRATCH_UMSK, 0xff);
3391         else {
3392                 tmp |= RB_NO_UPDATE;
3393                 WREG32(SCRATCH_UMSK, 0);
3394         }
3395
3396         mdelay(1);
3397         WREG32(CP_RB0_CNTL, tmp);
3398
3399         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3400
3401         ring->rptr = RREG32(CP_RB0_RPTR);
3402
3403         /* ring1  - compute only */
3404         /* Set ring buffer size */
3405         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3406         rb_bufsz = drm_order(ring->ring_size / 8);
3407         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3408 #ifdef __BIG_ENDIAN
3409         tmp |= BUF_SWAP_32BIT;
3410 #endif
3411         WREG32(CP_RB1_CNTL, tmp);
3412
3413         /* Initialize the ring buffer's read and write pointers */
3414         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3415         ring->wptr = 0;
3416         WREG32(CP_RB1_WPTR, ring->wptr);
3417
3418         /* set the wb address whether it's enabled or not */
3419         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3420         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3421
3422         mdelay(1);
3423         WREG32(CP_RB1_CNTL, tmp);
3424
3425         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3426
3427         ring->rptr = RREG32(CP_RB1_RPTR);
3428
3429         /* ring2 - compute only */
3430         /* Set ring buffer size */
3431         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3432         rb_bufsz = drm_order(ring->ring_size / 8);
3433         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3434 #ifdef __BIG_ENDIAN
3435         tmp |= BUF_SWAP_32BIT;
3436 #endif
3437         WREG32(CP_RB2_CNTL, tmp);
3438
3439         /* Initialize the ring buffer's read and write pointers */
3440         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3441         ring->wptr = 0;
3442         WREG32(CP_RB2_WPTR, ring->wptr);
3443
3444         /* set the wb address whether it's enabled or not */
3445         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3446         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3447
3448         mdelay(1);
3449         WREG32(CP_RB2_CNTL, tmp);
3450
3451         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3452
3453         ring->rptr = RREG32(CP_RB2_RPTR);
3454
3455         /* start the rings */
3456         si_cp_start(rdev);
3457         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3458         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3459         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3460         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3461         if (r) {
3462                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3463                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3464                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3465                 return r;
3466         }
3467         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3468         if (r) {
3469                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3470         }
3471         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3472         if (r) {
3473                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3474         }
3475
3476         return 0;
3477 }
3478
3479 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3480 {
3481         u32 reset_mask = 0;
3482         u32 tmp;
3483
3484         /* GRBM_STATUS */
3485         tmp = RREG32(GRBM_STATUS);
3486         if (tmp & (PA_BUSY | SC_BUSY |
3487                    BCI_BUSY | SX_BUSY |
3488                    TA_BUSY | VGT_BUSY |
3489                    DB_BUSY | CB_BUSY |
3490                    GDS_BUSY | SPI_BUSY |
3491                    IA_BUSY | IA_BUSY_NO_DMA))
3492                 reset_mask |= RADEON_RESET_GFX;
3493
3494         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3495                    CP_BUSY | CP_COHERENCY_BUSY))
3496                 reset_mask |= RADEON_RESET_CP;
3497
3498         if (tmp & GRBM_EE_BUSY)
3499                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3500
3501         /* GRBM_STATUS2 */
3502         tmp = RREG32(GRBM_STATUS2);
3503         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3504                 reset_mask |= RADEON_RESET_RLC;
3505
3506         /* DMA_STATUS_REG 0 */
3507         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3508         if (!(tmp & DMA_IDLE))
3509                 reset_mask |= RADEON_RESET_DMA;
3510
3511         /* DMA_STATUS_REG 1 */
3512         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3513         if (!(tmp & DMA_IDLE))
3514                 reset_mask |= RADEON_RESET_DMA1;
3515
3516         /* SRBM_STATUS2 */
3517         tmp = RREG32(SRBM_STATUS2);
3518         if (tmp & DMA_BUSY)
3519                 reset_mask |= RADEON_RESET_DMA;
3520
3521         if (tmp & DMA1_BUSY)
3522                 reset_mask |= RADEON_RESET_DMA1;
3523
3524         /* SRBM_STATUS */
3525         tmp = RREG32(SRBM_STATUS);
3526
3527         if (tmp & IH_BUSY)
3528                 reset_mask |= RADEON_RESET_IH;
3529
3530         if (tmp & SEM_BUSY)
3531                 reset_mask |= RADEON_RESET_SEM;
3532
3533         if (tmp & GRBM_RQ_PENDING)
3534                 reset_mask |= RADEON_RESET_GRBM;
3535
3536         if (tmp & VMC_BUSY)
3537                 reset_mask |= RADEON_RESET_VMC;
3538
3539         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3540                    MCC_BUSY | MCD_BUSY))
3541                 reset_mask |= RADEON_RESET_MC;
3542
3543         if (evergreen_is_display_hung(rdev))
3544                 reset_mask |= RADEON_RESET_DISPLAY;
3545
3546         /* VM_L2_STATUS */
3547         tmp = RREG32(VM_L2_STATUS);
3548         if (tmp & L2_BUSY)
3549                 reset_mask |= RADEON_RESET_VMC;
3550
3551         /* Skip MC reset as it's mostly likely not hung, just busy */
3552         if (reset_mask & RADEON_RESET_MC) {
3553                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3554                 reset_mask &= ~RADEON_RESET_MC;
3555         }
3556
3557         return reset_mask;
3558 }
3559
3560 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3561 {
3562         struct evergreen_mc_save save;
3563         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3564         u32 tmp;
3565
3566         if (reset_mask == 0)
3567                 return;
3568
3569         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3570
3571         evergreen_print_gpu_status_regs(rdev);
3572         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3573                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3574         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3575                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3576
3577         /* Disable CP parsing/prefetching */
3578         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3579
3580         if (reset_mask & RADEON_RESET_DMA) {
3581                 /* dma0 */
3582                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3583                 tmp &= ~DMA_RB_ENABLE;
3584                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3585         }
3586         if (reset_mask & RADEON_RESET_DMA1) {
3587                 /* dma1 */
3588                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3589                 tmp &= ~DMA_RB_ENABLE;
3590                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3591         }
3592
3593         udelay(50);
3594
3595         evergreen_mc_stop(rdev, &save);
3596         if (evergreen_mc_wait_for_idle(rdev)) {
3597                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3598         }
3599
3600         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3601                 grbm_soft_reset = SOFT_RESET_CB |
3602                         SOFT_RESET_DB |
3603                         SOFT_RESET_GDS |
3604                         SOFT_RESET_PA |
3605                         SOFT_RESET_SC |
3606                         SOFT_RESET_BCI |
3607                         SOFT_RESET_SPI |
3608                         SOFT_RESET_SX |
3609                         SOFT_RESET_TC |
3610                         SOFT_RESET_TA |
3611                         SOFT_RESET_VGT |
3612                         SOFT_RESET_IA;
3613         }
3614
3615         if (reset_mask & RADEON_RESET_CP) {
3616                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3617
3618                 srbm_soft_reset |= SOFT_RESET_GRBM;
3619         }
3620
3621         if (reset_mask & RADEON_RESET_DMA)
3622                 srbm_soft_reset |= SOFT_RESET_DMA;
3623
3624         if (reset_mask & RADEON_RESET_DMA1)
3625                 srbm_soft_reset |= SOFT_RESET_DMA1;
3626
3627         if (reset_mask & RADEON_RESET_DISPLAY)
3628                 srbm_soft_reset |= SOFT_RESET_DC;
3629
3630         if (reset_mask & RADEON_RESET_RLC)
3631                 grbm_soft_reset |= SOFT_RESET_RLC;
3632
3633         if (reset_mask & RADEON_RESET_SEM)
3634                 srbm_soft_reset |= SOFT_RESET_SEM;
3635
3636         if (reset_mask & RADEON_RESET_IH)
3637                 srbm_soft_reset |= SOFT_RESET_IH;
3638
3639         if (reset_mask & RADEON_RESET_GRBM)
3640                 srbm_soft_reset |= SOFT_RESET_GRBM;
3641
3642         if (reset_mask & RADEON_RESET_VMC)
3643                 srbm_soft_reset |= SOFT_RESET_VMC;
3644
3645         if (reset_mask & RADEON_RESET_MC)
3646                 srbm_soft_reset |= SOFT_RESET_MC;
3647
3648         if (grbm_soft_reset) {
3649                 tmp = RREG32(GRBM_SOFT_RESET);
3650                 tmp |= grbm_soft_reset;
3651                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3652                 WREG32(GRBM_SOFT_RESET, tmp);
3653                 tmp = RREG32(GRBM_SOFT_RESET);
3654
3655                 udelay(50);
3656
3657                 tmp &= ~grbm_soft_reset;
3658                 WREG32(GRBM_SOFT_RESET, tmp);
3659                 tmp = RREG32(GRBM_SOFT_RESET);
3660         }
3661
3662         if (srbm_soft_reset) {
3663                 tmp = RREG32(SRBM_SOFT_RESET);
3664                 tmp |= srbm_soft_reset;
3665                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3666                 WREG32(SRBM_SOFT_RESET, tmp);
3667                 tmp = RREG32(SRBM_SOFT_RESET);
3668
3669                 udelay(50);
3670
3671                 tmp &= ~srbm_soft_reset;
3672                 WREG32(SRBM_SOFT_RESET, tmp);
3673                 tmp = RREG32(SRBM_SOFT_RESET);
3674         }
3675
3676         /* Wait a little for things to settle down */
3677         udelay(50);
3678
3679         evergreen_mc_resume(rdev, &save);
3680         udelay(50);
3681
3682         evergreen_print_gpu_status_regs(rdev);
3683 }
3684
3685 int si_asic_reset(struct radeon_device *rdev)
3686 {
3687         u32 reset_mask;
3688
3689         reset_mask = si_gpu_check_soft_reset(rdev);
3690
3691         if (reset_mask)
3692                 r600_set_bios_scratch_engine_hung(rdev, true);
3693
3694         si_gpu_soft_reset(rdev, reset_mask);
3695
3696         reset_mask = si_gpu_check_soft_reset(rdev);
3697
3698         if (!reset_mask)
3699                 r600_set_bios_scratch_engine_hung(rdev, false);
3700
3701         return 0;
3702 }
3703
3704 /**
3705  * si_gfx_is_lockup - Check if the GFX engine is locked up
3706  *
3707  * @rdev: radeon_device pointer
3708  * @ring: radeon_ring structure holding ring information
3709  *
3710  * Check if the GFX engine is locked up.
3711  * Returns true if the engine appears to be locked up, false if not.
3712  */
3713 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3714 {
3715         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3716
3717         if (!(reset_mask & (RADEON_RESET_GFX |
3718                             RADEON_RESET_COMPUTE |
3719                             RADEON_RESET_CP))) {
3720                 radeon_ring_lockup_update(ring);
3721                 return false;
3722         }
3723         /* force CP activities */
3724         radeon_ring_force_activity(rdev, ring);
3725         return radeon_ring_test_lockup(rdev, ring);
3726 }
3727
3728 /**
3729  * si_dma_is_lockup - Check if the DMA engine is locked up
3730  *
3731  * @rdev: radeon_device pointer
3732  * @ring: radeon_ring structure holding ring information
3733  *
3734  * Check if the async DMA engine is locked up.
3735  * Returns true if the engine appears to be locked up, false if not.
3736  */
3737 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3738 {
3739         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3740         u32 mask;
3741
3742         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3743                 mask = RADEON_RESET_DMA;
3744         else
3745                 mask = RADEON_RESET_DMA1;
3746
3747         if (!(reset_mask & mask)) {
3748                 radeon_ring_lockup_update(ring);
3749                 return false;
3750         }
3751         /* force ring activities */
3752         radeon_ring_force_activity(rdev, ring);
3753         return radeon_ring_test_lockup(rdev, ring);
3754 }
3755
3756 /* MC */
3757 static void si_mc_program(struct radeon_device *rdev)
3758 {
3759         struct evergreen_mc_save save;
3760         u32 tmp;
3761         int i, j;
3762
3763         /* Initialize HDP */
3764         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3765                 WREG32((0x2c14 + j), 0x00000000);
3766                 WREG32((0x2c18 + j), 0x00000000);
3767                 WREG32((0x2c1c + j), 0x00000000);
3768                 WREG32((0x2c20 + j), 0x00000000);
3769                 WREG32((0x2c24 + j), 0x00000000);
3770         }
3771         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3772
3773         evergreen_mc_stop(rdev, &save);
3774         if (radeon_mc_wait_for_idle(rdev)) {
3775                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3776         }
3777         if (!ASIC_IS_NODCE(rdev))
3778                 /* Lockout access through VGA aperture*/
3779                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3780         /* Update configuration */
3781         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3782                rdev->mc.vram_start >> 12);
3783         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3784                rdev->mc.vram_end >> 12);
3785         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3786                rdev->vram_scratch.gpu_addr >> 12);
3787         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3788         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3789         WREG32(MC_VM_FB_LOCATION, tmp);
3790         /* XXX double check these! */
3791         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3792         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3793         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3794         WREG32(MC_VM_AGP_BASE, 0);
3795         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3796         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3797         if (radeon_mc_wait_for_idle(rdev)) {
3798                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3799         }
3800         evergreen_mc_resume(rdev, &save);
3801         if (!ASIC_IS_NODCE(rdev)) {
3802                 /* we need to own VRAM, so turn off the VGA renderer here
3803                  * to stop it overwriting our objects */
3804                 rv515_vga_render_disable(rdev);
3805         }
3806 }
3807
3808 void si_vram_gtt_location(struct radeon_device *rdev,
3809                           struct radeon_mc *mc)
3810 {
3811         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3812                 /* leave room for at least 1024M GTT */
3813                 dev_warn(rdev->dev, "limiting VRAM\n");
3814                 mc->real_vram_size = 0xFFC0000000ULL;
3815                 mc->mc_vram_size = 0xFFC0000000ULL;
3816         }
3817         radeon_vram_location(rdev, &rdev->mc, 0);
3818         rdev->mc.gtt_base_align = 0;
3819         radeon_gtt_location(rdev, mc);
3820 }
3821
3822 static int si_mc_init(struct radeon_device *rdev)
3823 {
3824         u32 tmp;
3825         int chansize, numchan;
3826
3827         /* Get VRAM informations */
3828         rdev->mc.vram_is_ddr = true;
3829         tmp = RREG32(MC_ARB_RAMCFG);
3830         if (tmp & CHANSIZE_OVERRIDE) {
3831                 chansize = 16;
3832         } else if (tmp & CHANSIZE_MASK) {
3833                 chansize = 64;
3834         } else {
3835                 chansize = 32;
3836         }
3837         tmp = RREG32(MC_SHARED_CHMAP);
3838         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3839         case 0:
3840         default:
3841                 numchan = 1;
3842                 break;
3843         case 1:
3844                 numchan = 2;
3845                 break;
3846         case 2:
3847                 numchan = 4;
3848                 break;
3849         case 3:
3850                 numchan = 8;
3851                 break;
3852         case 4:
3853                 numchan = 3;
3854                 break;
3855         case 5:
3856                 numchan = 6;
3857                 break;
3858         case 6:
3859                 numchan = 10;
3860                 break;
3861         case 7:
3862                 numchan = 12;
3863                 break;
3864         case 8:
3865                 numchan = 16;
3866                 break;
3867         }
3868         rdev->mc.vram_width = numchan * chansize;
3869         /* Could aper size report 0 ? */
3870         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3871         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3872         /* size in MB on si */
3873         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3874         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3875         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3876         si_vram_gtt_location(rdev, &rdev->mc);
3877         radeon_update_bandwidth_info(rdev);
3878
3879         return 0;
3880 }
3881
3882 /*
3883  * GART
3884  */
3885 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3886 {
3887         /* flush hdp cache */
3888         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3889
3890         /* bits 0-15 are the VM contexts0-15 */
3891         WREG32(VM_INVALIDATE_REQUEST, 1);
3892 }
3893
3894 static int si_pcie_gart_enable(struct radeon_device *rdev)
3895 {
3896         int r, i;
3897
3898         if (rdev->gart.robj == NULL) {
3899                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3900                 return -EINVAL;
3901         }
3902         r = radeon_gart_table_vram_pin(rdev);
3903         if (r)
3904                 return r;
3905         radeon_gart_restore(rdev);
3906         /* Setup TLB control */
3907         WREG32(MC_VM_MX_L1_TLB_CNTL,
3908                (0xA << 7) |
3909                ENABLE_L1_TLB |
3910                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3911                ENABLE_ADVANCED_DRIVER_MODEL |
3912                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3913         /* Setup L2 cache */
3914         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3915                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3916                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3917                EFFECTIVE_L2_QUEUE_SIZE(7) |
3918                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3919         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3920         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3921                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3922         /* setup context0 */
3923         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3924         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3925         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3926         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3927                         (u32)(rdev->dummy_page.addr >> 12));
3928         WREG32(VM_CONTEXT0_CNTL2, 0);
3929         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3930                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3931
3932         WREG32(0x15D4, 0);
3933         WREG32(0x15D8, 0);
3934         WREG32(0x15DC, 0);
3935
3936         /* empty context1-15 */
3937         /* set vm size, must be a multiple of 4 */
3938         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3939         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3940         /* Assign the pt base to something valid for now; the pts used for
3941          * the VMs are determined by the application and setup and assigned
3942          * on the fly in the vm part of radeon_gart.c
3943          */
3944         for (i = 1; i < 16; i++) {
3945                 if (i < 8)
3946                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3947                                rdev->gart.table_addr >> 12);
3948                 else
3949                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3950                                rdev->gart.table_addr >> 12);
3951         }
3952
3953         /* enable context1-15 */
3954         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3955                (u32)(rdev->dummy_page.addr >> 12));
3956         WREG32(VM_CONTEXT1_CNTL2, 4);
3957         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3958                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3959                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3960                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3961                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3962                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3963                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3964                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3965                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3966                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3967                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3968                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3969                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3970
3971         si_pcie_gart_tlb_flush(rdev);
3972         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3973                  (unsigned)(rdev->mc.gtt_size >> 20),
3974                  (unsigned long long)rdev->gart.table_addr);
3975         rdev->gart.ready = true;
3976         return 0;
3977 }
3978
3979 static void si_pcie_gart_disable(struct radeon_device *rdev)
3980 {
3981         /* Disable all tables */
3982         WREG32(VM_CONTEXT0_CNTL, 0);
3983         WREG32(VM_CONTEXT1_CNTL, 0);
3984         /* Setup TLB control */
3985         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3986                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3987         /* Setup L2 cache */
3988         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3989                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3990                EFFECTIVE_L2_QUEUE_SIZE(7) |
3991                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3992         WREG32(VM_L2_CNTL2, 0);
3993         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3994                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3995         radeon_gart_table_vram_unpin(rdev);
3996 }
3997
3998 static void si_pcie_gart_fini(struct radeon_device *rdev)
3999 {
4000         si_pcie_gart_disable(rdev);
4001         radeon_gart_table_vram_free(rdev);
4002         radeon_gart_fini(rdev);
4003 }
4004
4005 /* vm parser */
4006 static bool si_vm_reg_valid(u32 reg)
4007 {
4008         /* context regs are fine */
4009         if (reg >= 0x28000)
4010                 return true;
4011
4012         /* check config regs */
4013         switch (reg) {
4014         case GRBM_GFX_INDEX:
4015         case CP_STRMOUT_CNTL:
4016         case VGT_VTX_VECT_EJECT_REG:
4017         case VGT_CACHE_INVALIDATION:
4018         case VGT_ESGS_RING_SIZE:
4019         case VGT_GSVS_RING_SIZE:
4020         case VGT_GS_VERTEX_REUSE:
4021         case VGT_PRIMITIVE_TYPE:
4022         case VGT_INDEX_TYPE:
4023         case VGT_NUM_INDICES:
4024         case VGT_NUM_INSTANCES:
4025         case VGT_TF_RING_SIZE:
4026         case VGT_HS_OFFCHIP_PARAM:
4027         case VGT_TF_MEMORY_BASE:
4028         case PA_CL_ENHANCE:
4029         case PA_SU_LINE_STIPPLE_VALUE:
4030         case PA_SC_LINE_STIPPLE_STATE:
4031         case PA_SC_ENHANCE:
4032         case SQC_CACHES:
4033         case SPI_STATIC_THREAD_MGMT_1:
4034         case SPI_STATIC_THREAD_MGMT_2:
4035         case SPI_STATIC_THREAD_MGMT_3:
4036         case SPI_PS_MAX_WAVE_ID:
4037         case SPI_CONFIG_CNTL:
4038         case SPI_CONFIG_CNTL_1:
4039         case TA_CNTL_AUX:
4040                 return true;
4041         default:
4042                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4043                 return false;
4044         }
4045 }
4046
4047 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4048                                   u32 *ib, struct radeon_cs_packet *pkt)
4049 {
4050         switch (pkt->opcode) {
4051         case PACKET3_NOP:
4052         case PACKET3_SET_BASE:
4053         case PACKET3_SET_CE_DE_COUNTERS:
4054         case PACKET3_LOAD_CONST_RAM:
4055         case PACKET3_WRITE_CONST_RAM:
4056         case PACKET3_WRITE_CONST_RAM_OFFSET:
4057         case PACKET3_DUMP_CONST_RAM:
4058         case PACKET3_INCREMENT_CE_COUNTER:
4059         case PACKET3_WAIT_ON_DE_COUNTER:
4060         case PACKET3_CE_WRITE:
4061                 break;
4062         default:
4063                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4064                 return -EINVAL;
4065         }
4066         return 0;
4067 }
4068
4069 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4070                                    u32 *ib, struct radeon_cs_packet *pkt)
4071 {
4072         u32 idx = pkt->idx + 1;
4073         u32 idx_value = ib[idx];
4074         u32 start_reg, end_reg, reg, i;
4075         u32 command, info;
4076
4077         switch (pkt->opcode) {
4078         case PACKET3_NOP:
4079         case PACKET3_SET_BASE:
4080         case PACKET3_CLEAR_STATE:
4081         case PACKET3_INDEX_BUFFER_SIZE:
4082         case PACKET3_DISPATCH_DIRECT:
4083         case PACKET3_DISPATCH_INDIRECT:
4084         case PACKET3_ALLOC_GDS:
4085         case PACKET3_WRITE_GDS_RAM:
4086         case PACKET3_ATOMIC_GDS:
4087         case PACKET3_ATOMIC:
4088         case PACKET3_OCCLUSION_QUERY:
4089         case PACKET3_SET_PREDICATION:
4090         case PACKET3_COND_EXEC:
4091         case PACKET3_PRED_EXEC:
4092         case PACKET3_DRAW_INDIRECT:
4093         case PACKET3_DRAW_INDEX_INDIRECT:
4094         case PACKET3_INDEX_BASE:
4095         case PACKET3_DRAW_INDEX_2:
4096         case PACKET3_CONTEXT_CONTROL:
4097         case PACKET3_INDEX_TYPE:
4098         case PACKET3_DRAW_INDIRECT_MULTI:
4099         case PACKET3_DRAW_INDEX_AUTO:
4100         case PACKET3_DRAW_INDEX_IMMD:
4101         case PACKET3_NUM_INSTANCES:
4102         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4103         case PACKET3_STRMOUT_BUFFER_UPDATE:
4104         case PACKET3_DRAW_INDEX_OFFSET_2:
4105         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4106         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4107         case PACKET3_MPEG_INDEX:
4108         case PACKET3_WAIT_REG_MEM:
4109         case PACKET3_MEM_WRITE:
4110         case PACKET3_PFP_SYNC_ME:
4111         case PACKET3_SURFACE_SYNC:
4112         case PACKET3_EVENT_WRITE:
4113         case PACKET3_EVENT_WRITE_EOP:
4114         case PACKET3_EVENT_WRITE_EOS:
4115         case PACKET3_SET_CONTEXT_REG:
4116         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4117         case PACKET3_SET_SH_REG:
4118         case PACKET3_SET_SH_REG_OFFSET:
4119         case PACKET3_INCREMENT_DE_COUNTER:
4120         case PACKET3_WAIT_ON_CE_COUNTER:
4121         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4122         case PACKET3_ME_WRITE:
4123                 break;
4124         case PACKET3_COPY_DATA:
4125                 if ((idx_value & 0xf00) == 0) {
4126                         reg = ib[idx + 3] * 4;
4127                         if (!si_vm_reg_valid(reg))
4128                                 return -EINVAL;
4129                 }
4130                 break;
4131         case PACKET3_WRITE_DATA:
4132                 if ((idx_value & 0xf00) == 0) {
4133                         start_reg = ib[idx + 1] * 4;
4134                         if (idx_value & 0x10000) {
4135                                 if (!si_vm_reg_valid(start_reg))
4136                                         return -EINVAL;
4137                         } else {
4138                                 for (i = 0; i < (pkt->count - 2); i++) {
4139                                         reg = start_reg + (4 * i);
4140                                         if (!si_vm_reg_valid(reg))
4141                                                 return -EINVAL;
4142                                 }
4143                         }
4144                 }
4145                 break;
4146         case PACKET3_COND_WRITE:
4147                 if (idx_value & 0x100) {
4148                         reg = ib[idx + 5] * 4;
4149                         if (!si_vm_reg_valid(reg))
4150                                 return -EINVAL;
4151                 }
4152                 break;
4153         case PACKET3_COPY_DW:
4154                 if (idx_value & 0x2) {
4155                         reg = ib[idx + 3] * 4;
4156                         if (!si_vm_reg_valid(reg))
4157                                 return -EINVAL;
4158                 }
4159                 break;
4160         case PACKET3_SET_CONFIG_REG:
4161                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4162                 end_reg = 4 * pkt->count + start_reg - 4;
4163                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4164                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4165                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4166                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4167                         return -EINVAL;
4168                 }
4169                 for (i = 0; i < pkt->count; i++) {
4170                         reg = start_reg + (4 * i);
4171                         if (!si_vm_reg_valid(reg))
4172                                 return -EINVAL;
4173                 }
4174                 break;
4175         case PACKET3_CP_DMA:
4176                 command = ib[idx + 4];
4177                 info = ib[idx + 1];
4178                 if (command & PACKET3_CP_DMA_CMD_SAS) {
4179                         /* src address space is register */
4180                         if (((info & 0x60000000) >> 29) == 0) {
4181                                 start_reg = idx_value << 2;
4182                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
4183                                         reg = start_reg;
4184                                         if (!si_vm_reg_valid(reg)) {
4185                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4186                                                 return -EINVAL;
4187                                         }
4188                                 } else {
4189                                         for (i = 0; i < (command & 0x1fffff); i++) {
4190                                                 reg = start_reg + (4 * i);
4191                                                 if (!si_vm_reg_valid(reg)) {
4192                                                         DRM_ERROR("CP DMA Bad SRC register\n");
4193                                                         return -EINVAL;
4194                                                 }
4195                                         }
4196                                 }
4197                         }
4198                 }
4199                 if (command & PACKET3_CP_DMA_CMD_DAS) {
4200                         /* dst address space is register */
4201                         if (((info & 0x00300000) >> 20) == 0) {
4202                                 start_reg = ib[idx + 2];
4203                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
4204                                         reg = start_reg;
4205                                         if (!si_vm_reg_valid(reg)) {
4206                                                 DRM_ERROR("CP DMA Bad DST register\n");
4207                                                 return -EINVAL;
4208                                         }
4209                                 } else {
4210                                         for (i = 0; i < (command & 0x1fffff); i++) {
4211                                                 reg = start_reg + (4 * i);
4212                                                 if (!si_vm_reg_valid(reg)) {
4213                                                         DRM_ERROR("CP DMA Bad DST register\n");
4214                                                         return -EINVAL;
4215                                                 }
4216                                         }
4217                                 }
4218                         }
4219                 }
4220                 break;
4221         default:
4222                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4223                 return -EINVAL;
4224         }
4225         return 0;
4226 }
4227
4228 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4229                                        u32 *ib, struct radeon_cs_packet *pkt)
4230 {
4231         u32 idx = pkt->idx + 1;
4232         u32 idx_value = ib[idx];
4233         u32 start_reg, reg, i;
4234
4235         switch (pkt->opcode) {
4236         case PACKET3_NOP:
4237         case PACKET3_SET_BASE:
4238         case PACKET3_CLEAR_STATE:
4239         case PACKET3_DISPATCH_DIRECT:
4240         case PACKET3_DISPATCH_INDIRECT:
4241         case PACKET3_ALLOC_GDS:
4242         case PACKET3_WRITE_GDS_RAM:
4243         case PACKET3_ATOMIC_GDS:
4244         case PACKET3_ATOMIC:
4245         case PACKET3_OCCLUSION_QUERY:
4246         case PACKET3_SET_PREDICATION:
4247         case PACKET3_COND_EXEC:
4248         case PACKET3_PRED_EXEC:
4249         case PACKET3_CONTEXT_CONTROL:
4250         case PACKET3_STRMOUT_BUFFER_UPDATE:
4251         case PACKET3_WAIT_REG_MEM:
4252         case PACKET3_MEM_WRITE:
4253         case PACKET3_PFP_SYNC_ME:
4254         case PACKET3_SURFACE_SYNC:
4255         case PACKET3_EVENT_WRITE:
4256         case PACKET3_EVENT_WRITE_EOP:
4257         case PACKET3_EVENT_WRITE_EOS:
4258         case PACKET3_SET_CONTEXT_REG:
4259         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4260         case PACKET3_SET_SH_REG:
4261         case PACKET3_SET_SH_REG_OFFSET:
4262         case PACKET3_INCREMENT_DE_COUNTER:
4263         case PACKET3_WAIT_ON_CE_COUNTER:
4264         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4265         case PACKET3_ME_WRITE:
4266                 break;
4267         case PACKET3_COPY_DATA:
4268                 if ((idx_value & 0xf00) == 0) {
4269                         reg = ib[idx + 3] * 4;
4270                         if (!si_vm_reg_valid(reg))
4271                                 return -EINVAL;
4272                 }
4273                 break;
4274         case PACKET3_WRITE_DATA:
4275                 if ((idx_value & 0xf00) == 0) {
4276                         start_reg = ib[idx + 1] * 4;
4277                         if (idx_value & 0x10000) {
4278                                 if (!si_vm_reg_valid(start_reg))
4279                                         return -EINVAL;
4280                         } else {
4281                                 for (i = 0; i < (pkt->count - 2); i++) {
4282                                         reg = start_reg + (4 * i);
4283                                         if (!si_vm_reg_valid(reg))
4284                                                 return -EINVAL;
4285                                 }
4286                         }
4287                 }
4288                 break;
4289         case PACKET3_COND_WRITE:
4290                 if (idx_value & 0x100) {
4291                         reg = ib[idx + 5] * 4;
4292                         if (!si_vm_reg_valid(reg))
4293                                 return -EINVAL;
4294                 }
4295                 break;
4296         case PACKET3_COPY_DW:
4297                 if (idx_value & 0x2) {
4298                         reg = ib[idx + 3] * 4;
4299                         if (!si_vm_reg_valid(reg))
4300                                 return -EINVAL;
4301                 }
4302                 break;
4303         default:
4304                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4305                 return -EINVAL;
4306         }
4307         return 0;
4308 }
4309
4310 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4311 {
4312         int ret = 0;
4313         u32 idx = 0;
4314         struct radeon_cs_packet pkt;
4315
4316         do {
4317                 pkt.idx = idx;
4318                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4319                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4320                 pkt.one_reg_wr = 0;
4321                 switch (pkt.type) {
4322                 case RADEON_PACKET_TYPE0:
4323                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4324                         ret = -EINVAL;
4325                         break;
4326                 case RADEON_PACKET_TYPE2:
4327                         idx += 1;
4328                         break;
4329                 case RADEON_PACKET_TYPE3:
4330                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4331                         if (ib->is_const_ib)
4332                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4333                         else {
4334                                 switch (ib->ring) {
4335                                 case RADEON_RING_TYPE_GFX_INDEX:
4336                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4337                                         break;
4338                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4339                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4340                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4341                                         break;
4342                                 default:
4343                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4344                                         ret = -EINVAL;
4345                                         break;
4346                                 }
4347                         }
4348                         idx += pkt.count + 2;
4349                         break;
4350                 default:
4351                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4352                         ret = -EINVAL;
4353                         break;
4354                 }
4355                 if (ret)
4356                         break;
4357         } while (idx < ib->length_dw);
4358
4359         return ret;
4360 }
4361
4362 /*
4363  * vm
4364  */
4365 int si_vm_init(struct radeon_device *rdev)
4366 {
4367         /* number of VMs */
4368         rdev->vm_manager.nvm = 16;
4369         /* base offset of vram pages */
4370         rdev->vm_manager.vram_base_offset = 0;
4371
4372         return 0;
4373 }
4374
4375 void si_vm_fini(struct radeon_device *rdev)
4376 {
4377 }
4378
4379 /**
4380  * si_vm_set_page - update the page tables using the CP
4381  *
4382  * @rdev: radeon_device pointer
4383  * @ib: indirect buffer to fill with commands
4384  * @pe: addr of the page entry
4385  * @addr: dst addr to write into pe
4386  * @count: number of page entries to update
4387  * @incr: increase next addr by incr bytes
4388  * @flags: access flags
4389  *
4390  * Update the page tables using the CP (SI).
4391  */
4392 void si_vm_set_page(struct radeon_device *rdev,
4393                     struct radeon_ib *ib,
4394                     uint64_t pe,
4395                     uint64_t addr, unsigned count,
4396                     uint32_t incr, uint32_t flags)
4397 {
4398         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4399         uint64_t value;
4400         unsigned ndw;
4401
4402         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4403                 while (count) {
4404                         ndw = 2 + count * 2;
4405                         if (ndw > 0x3FFE)
4406                                 ndw = 0x3FFE;
4407
4408                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4409                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4410                                         WRITE_DATA_DST_SEL(1));
4411                         ib->ptr[ib->length_dw++] = pe;
4412                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4413                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4414                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4415                                         value = radeon_vm_map_gart(rdev, addr);
4416                                         value &= 0xFFFFFFFFFFFFF000ULL;
4417                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4418                                         value = addr;
4419                                 } else {
4420                                         value = 0;
4421                                 }
4422                                 addr += incr;
4423                                 value |= r600_flags;
4424                                 ib->ptr[ib->length_dw++] = value;
4425                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4426                         }
4427                 }
4428         } else {
4429                 /* DMA */
4430                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4431                         while (count) {
4432                                 ndw = count * 2;
4433                                 if (ndw > 0xFFFFE)
4434                                         ndw = 0xFFFFE;
4435
4436                                 /* for non-physically contiguous pages (system) */
4437                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4438                                 ib->ptr[ib->length_dw++] = pe;
4439                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4440                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4441                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4442                                                 value = radeon_vm_map_gart(rdev, addr);
4443                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4444                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4445                                                 value = addr;
4446                                         } else {
4447                                                 value = 0;
4448                                         }
4449                                         addr += incr;
4450                                         value |= r600_flags;
4451                                         ib->ptr[ib->length_dw++] = value;
4452                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4453                                 }
4454                         }
4455                 } else {
4456                         while (count) {
4457                                 ndw = count * 2;
4458                                 if (ndw > 0xFFFFE)
4459                                         ndw = 0xFFFFE;
4460
4461                                 if (flags & RADEON_VM_PAGE_VALID)
4462                                         value = addr;
4463                                 else
4464                                         value = 0;
4465                                 /* for physically contiguous pages (vram) */
4466                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4467                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4468                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4469                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4470                                 ib->ptr[ib->length_dw++] = 0;
4471                                 ib->ptr[ib->length_dw++] = value; /* value */
4472                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4473                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4474                                 ib->ptr[ib->length_dw++] = 0;
4475                                 pe += ndw * 4;
4476                                 addr += (ndw / 2) * incr;
4477                                 count -= ndw / 2;
4478                         }
4479                 }
4480                 while (ib->length_dw & 0x7)
4481                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4482         }
4483 }
4484
4485 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4486 {
4487         struct radeon_ring *ring = &rdev->ring[ridx];
4488
4489         if (vm == NULL)
4490                 return;
4491
4492         /* write new base address */
4493         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4494         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4495                                  WRITE_DATA_DST_SEL(0)));
4496
4497         if (vm->id < 8) {
4498                 radeon_ring_write(ring,
4499                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4500         } else {
4501                 radeon_ring_write(ring,
4502                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4503         }
4504         radeon_ring_write(ring, 0);
4505         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4506
4507         /* flush hdp cache */
4508         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4509         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4510                                  WRITE_DATA_DST_SEL(0)));
4511         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4512         radeon_ring_write(ring, 0);
4513         radeon_ring_write(ring, 0x1);
4514
4515         /* bits 0-15 are the VM contexts0-15 */
4516         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4517         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4518                                  WRITE_DATA_DST_SEL(0)));
4519         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4520         radeon_ring_write(ring, 0);
4521         radeon_ring_write(ring, 1 << vm->id);
4522
4523         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4524         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4525         radeon_ring_write(ring, 0x0);
4526 }
4527
4528 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4529 {
4530         struct radeon_ring *ring = &rdev->ring[ridx];
4531
4532         if (vm == NULL)
4533                 return;
4534
4535         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4536         if (vm->id < 8) {
4537                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4538         } else {
4539                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4540         }
4541         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4542
4543         /* flush hdp cache */
4544         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4545         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4546         radeon_ring_write(ring, 1);
4547
4548         /* bits 0-7 are the VM contexts0-7 */
4549         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4550         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4551         radeon_ring_write(ring, 1 << vm->id);
4552 }
4553
4554 /*
4555  * RLC
4556  */
4557 void si_rlc_fini(struct radeon_device *rdev)
4558 {
4559         int r;
4560
4561         /* save restore block */
4562         if (rdev->rlc.save_restore_obj) {
4563                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4564                 if (unlikely(r != 0))
4565                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4566                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4567                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4568
4569                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4570                 rdev->rlc.save_restore_obj = NULL;
4571         }
4572
4573         /* clear state block */
4574         if (rdev->rlc.clear_state_obj) {
4575                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4576                 if (unlikely(r != 0))
4577                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4578                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4579                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4580
4581                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4582                 rdev->rlc.clear_state_obj = NULL;
4583         }
4584 }
4585
4586 int si_rlc_init(struct radeon_device *rdev)
4587 {
4588         int r, i;
4589         volatile u32 *dst_ptr;
4590
4591         /* save restore block */
4592         if (rdev->rlc.save_restore_obj == NULL) {
4593                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4594                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4595                                      &rdev->rlc.save_restore_obj);
4596                 if (r) {
4597                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4598                         return r;
4599                 }
4600         }
4601
4602         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4603         if (unlikely(r != 0)) {
4604                 si_rlc_fini(rdev);
4605                 return r;
4606         }
4607         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4608                           &rdev->rlc.save_restore_gpu_addr);
4609         if (r) {
4610                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4611                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4612                 si_rlc_fini(rdev);
4613                 return r;
4614         }
4615
4616         if (rdev->family == CHIP_VERDE) {
4617                 r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
4618                 if (r) {
4619                         dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
4620                         si_rlc_fini(rdev);
4621                 return r;
4622                 }
4623                 /* write the sr buffer */
4624                 dst_ptr = rdev->rlc.sr_ptr;
4625                 for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
4626                         dst_ptr[i] = verde_rlc_save_restore_register_list[i];
4627                 }
4628                 radeon_bo_kunmap(rdev->rlc.save_restore_obj);
4629         }
4630         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4631
4632         /* clear state block */
4633         if (rdev->rlc.clear_state_obj == NULL) {
4634                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4635                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4636                                      &rdev->rlc.clear_state_obj);
4637                 if (r) {
4638                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4639                         si_rlc_fini(rdev);
4640                         return r;
4641                 }
4642         }
4643         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4644         if (unlikely(r != 0)) {
4645                 si_rlc_fini(rdev);
4646                 return r;
4647         }
4648         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4649                           &rdev->rlc.clear_state_gpu_addr);
4650         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4651         if (r) {
4652                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4653                 si_rlc_fini(rdev);
4654                 return r;
4655         }
4656
4657         return 0;
4658 }
4659
4660 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4661                                          bool enable)
4662 {
4663         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4664         u32 mask;
4665         int i;
4666
4667         if (enable)
4668                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4669         else
4670                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4671         WREG32(CP_INT_CNTL_RING0, tmp);
4672
4673         if (!enable) {
4674                 /* read a gfx register */
4675                 tmp = RREG32(DB_DEPTH_INFO);
4676
4677                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4678                 for (i = 0; i < rdev->usec_timeout; i++) {
4679                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4680                                 break;
4681                         udelay(1);
4682                 }
4683         }
4684 }
4685
4686 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4687 {
4688         int i;
4689
4690         for (i = 0; i < rdev->usec_timeout; i++) {
4691                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4692                         break;
4693                 udelay(1);
4694         }
4695
4696         for (i = 0; i < rdev->usec_timeout; i++) {
4697                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4698                         break;
4699                 udelay(1);
4700         }
4701 }
4702
4703 static void si_rlc_stop(struct radeon_device *rdev)
4704 {
4705         WREG32(RLC_CNTL, 0);
4706
4707         si_enable_gui_idle_interrupt(rdev, false);
4708
4709         si_wait_for_rlc_serdes(rdev);
4710 }
4711
4712 static void si_rlc_start(struct radeon_device *rdev)
4713 {
4714         WREG32(RLC_CNTL, RLC_ENABLE);
4715
4716         si_enable_gui_idle_interrupt(rdev, true);
4717
4718         udelay(50);
4719 }
4720
4721 static bool si_lbpw_supported(struct radeon_device *rdev)
4722 {
4723         u32 tmp;
4724
4725         /* Enable LBPW only for DDR3 */
4726         tmp = RREG32(MC_SEQ_MISC0);
4727         if ((tmp & 0xF0000000) == 0xB0000000)
4728                 return true;
4729         return false;
4730 }
4731
4732 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
4733 {
4734         u32 tmp;
4735
4736         tmp = RREG32(RLC_LB_CNTL);
4737         if (enable)
4738                 tmp |= LOAD_BALANCE_ENABLE;
4739         else
4740                 tmp &= ~LOAD_BALANCE_ENABLE;
4741         WREG32(RLC_LB_CNTL, tmp);
4742
4743         if (!enable) {
4744                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4745                 WREG32(SPI_LB_CU_MASK, 0x00ff);
4746         }
4747 }
4748
4749 static int si_rlc_resume(struct radeon_device *rdev)
4750 {
4751         u32 i;
4752         const __be32 *fw_data;
4753
4754         if (!rdev->rlc_fw)
4755                 return -EINVAL;
4756
4757         si_rlc_stop(rdev);
4758
4759         WREG32(RLC_RL_BASE, 0);
4760         WREG32(RLC_RL_SIZE, 0);
4761         WREG32(RLC_LB_CNTL, 0);
4762         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4763         WREG32(RLC_LB_CNTR_INIT, 0);
4764         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4765
4766         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4767         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4768
4769         WREG32(RLC_MC_CNTL, 0);
4770         WREG32(RLC_UCODE_CNTL, 0);
4771
4772         fw_data = (const __be32 *)rdev->rlc_fw->data;
4773         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4774                 WREG32(RLC_UCODE_ADDR, i);
4775                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4776         }
4777         WREG32(RLC_UCODE_ADDR, 0);
4778
4779         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
4780
4781         si_rlc_start(rdev);
4782
4783         return 0;
4784 }
4785
4786 static void si_enable_interrupts(struct radeon_device *rdev)
4787 {
4788         u32 ih_cntl = RREG32(IH_CNTL);
4789         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4790
4791         ih_cntl |= ENABLE_INTR;
4792         ih_rb_cntl |= IH_RB_ENABLE;
4793         WREG32(IH_CNTL, ih_cntl);
4794         WREG32(IH_RB_CNTL, ih_rb_cntl);
4795         rdev->ih.enabled = true;
4796 }
4797
4798 static void si_disable_interrupts(struct radeon_device *rdev)
4799 {
4800         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4801         u32 ih_cntl = RREG32(IH_CNTL);
4802
4803         ih_rb_cntl &= ~IH_RB_ENABLE;
4804         ih_cntl &= ~ENABLE_INTR;
4805         WREG32(IH_RB_CNTL, ih_rb_cntl);
4806         WREG32(IH_CNTL, ih_cntl);
4807         /* set rptr, wptr to 0 */
4808         WREG32(IH_RB_RPTR, 0);
4809         WREG32(IH_RB_WPTR, 0);
4810         rdev->ih.enabled = false;
4811         rdev->ih.rptr = 0;
4812 }
4813
4814 static void si_disable_interrupt_state(struct radeon_device *rdev)
4815 {
4816         u32 tmp;
4817
4818         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4819         WREG32(CP_INT_CNTL_RING1, 0);
4820         WREG32(CP_INT_CNTL_RING2, 0);
4821         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4822         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4823         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4824         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4825         WREG32(GRBM_INT_CNTL, 0);
4826         if (rdev->num_crtc >= 2) {
4827                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4828                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4829         }
4830         if (rdev->num_crtc >= 4) {
4831                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4832                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4833         }
4834         if (rdev->num_crtc >= 6) {
4835                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4836                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4837         }
4838
4839         if (rdev->num_crtc >= 2) {
4840                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4841                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4842         }
4843         if (rdev->num_crtc >= 4) {
4844                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4845                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4846         }
4847         if (rdev->num_crtc >= 6) {
4848                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4849                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4850         }
4851
4852         if (!ASIC_IS_NODCE(rdev)) {
4853                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4854
4855                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4856                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4857                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4858                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4859                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4860                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4861                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4862                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4863                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4864                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4865                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4866                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4867         }
4868 }
4869
4870 static int si_irq_init(struct radeon_device *rdev)
4871 {
4872         int ret = 0;
4873         int rb_bufsz;
4874         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4875
4876         /* allocate ring */
4877         ret = r600_ih_ring_alloc(rdev);
4878         if (ret)
4879                 return ret;
4880
4881         /* disable irqs */
4882         si_disable_interrupts(rdev);
4883
4884         /* init rlc */
4885         ret = si_rlc_resume(rdev);
4886         if (ret) {
4887                 r600_ih_ring_fini(rdev);
4888                 return ret;
4889         }
4890
4891         /* setup interrupt control */
4892         /* set dummy read address to ring address */
4893         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4894         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4895         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4896          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4897          */
4898         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4899         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4900         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4901         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4902
4903         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4904         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4905
4906         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4907                       IH_WPTR_OVERFLOW_CLEAR |
4908                       (rb_bufsz << 1));
4909
4910         if (rdev->wb.enabled)
4911                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4912
4913         /* set the writeback address whether it's enabled or not */
4914         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4915         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4916
4917         WREG32(IH_RB_CNTL, ih_rb_cntl);
4918
4919         /* set rptr, wptr to 0 */
4920         WREG32(IH_RB_RPTR, 0);
4921         WREG32(IH_RB_WPTR, 0);
4922
4923         /* Default settings for IH_CNTL (disabled at first) */
4924         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4925         /* RPTR_REARM only works if msi's are enabled */
4926         if (rdev->msi_enabled)
4927                 ih_cntl |= RPTR_REARM;
4928         WREG32(IH_CNTL, ih_cntl);
4929
4930         /* force the active interrupt state to all disabled */
4931         si_disable_interrupt_state(rdev);
4932
4933         pci_set_master(rdev->pdev);
4934
4935         /* enable irqs */
4936         si_enable_interrupts(rdev);
4937
4938         return ret;
4939 }
4940
4941 int si_irq_set(struct radeon_device *rdev)
4942 {
4943         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4944         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4945         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4946         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
4947         u32 grbm_int_cntl = 0;
4948         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4949         u32 dma_cntl, dma_cntl1;
4950
4951         if (!rdev->irq.installed) {
4952                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4953                 return -EINVAL;
4954         }
4955         /* don't enable anything if the ih is disabled */
4956         if (!rdev->ih.enabled) {
4957                 si_disable_interrupts(rdev);
4958                 /* force the active interrupt state to all disabled */
4959                 si_disable_interrupt_state(rdev);
4960                 return 0;
4961         }
4962
4963         if (!ASIC_IS_NODCE(rdev)) {
4964                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4965                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4966                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4967                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4968                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4969                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4970         }
4971
4972         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4973         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4974
4975         /* enable CP interrupts on all rings */
4976         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4977                 DRM_DEBUG("si_irq_set: sw int gfx\n");
4978                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4979         }
4980         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4981                 DRM_DEBUG("si_irq_set: sw int cp1\n");
4982                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4983         }
4984         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4985                 DRM_DEBUG("si_irq_set: sw int cp2\n");
4986                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4987         }
4988         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4989                 DRM_DEBUG("si_irq_set: sw int dma\n");
4990                 dma_cntl |= TRAP_ENABLE;
4991         }
4992
4993         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4994                 DRM_DEBUG("si_irq_set: sw int dma1\n");
4995                 dma_cntl1 |= TRAP_ENABLE;
4996         }
4997         if (rdev->irq.crtc_vblank_int[0] ||
4998             atomic_read(&rdev->irq.pflip[0])) {
4999                 DRM_DEBUG("si_irq_set: vblank 0\n");
5000                 crtc1 |= VBLANK_INT_MASK;
5001         }
5002         if (rdev->irq.crtc_vblank_int[1] ||
5003             atomic_read(&rdev->irq.pflip[1])) {
5004                 DRM_DEBUG("si_irq_set: vblank 1\n");
5005                 crtc2 |= VBLANK_INT_MASK;
5006         }
5007         if (rdev->irq.crtc_vblank_int[2] ||
5008             atomic_read(&rdev->irq.pflip[2])) {
5009                 DRM_DEBUG("si_irq_set: vblank 2\n");
5010                 crtc3 |= VBLANK_INT_MASK;
5011         }
5012         if (rdev->irq.crtc_vblank_int[3] ||
5013             atomic_read(&rdev->irq.pflip[3])) {
5014                 DRM_DEBUG("si_irq_set: vblank 3\n");
5015                 crtc4 |= VBLANK_INT_MASK;
5016         }
5017         if (rdev->irq.crtc_vblank_int[4] ||
5018             atomic_read(&rdev->irq.pflip[4])) {
5019                 DRM_DEBUG("si_irq_set: vblank 4\n");
5020                 crtc5 |= VBLANK_INT_MASK;
5021         }
5022         if (rdev->irq.crtc_vblank_int[5] ||
5023             atomic_read(&rdev->irq.pflip[5])) {
5024                 DRM_DEBUG("si_irq_set: vblank 5\n");
5025                 crtc6 |= VBLANK_INT_MASK;
5026         }
5027         if (rdev->irq.hpd[0]) {
5028                 DRM_DEBUG("si_irq_set: hpd 1\n");
5029                 hpd1 |= DC_HPDx_INT_EN;
5030         }
5031         if (rdev->irq.hpd[1]) {
5032                 DRM_DEBUG("si_irq_set: hpd 2\n");
5033                 hpd2 |= DC_HPDx_INT_EN;
5034         }
5035         if (rdev->irq.hpd[2]) {
5036                 DRM_DEBUG("si_irq_set: hpd 3\n");
5037                 hpd3 |= DC_HPDx_INT_EN;
5038         }
5039         if (rdev->irq.hpd[3]) {
5040                 DRM_DEBUG("si_irq_set: hpd 4\n");
5041                 hpd4 |= DC_HPDx_INT_EN;
5042         }
5043         if (rdev->irq.hpd[4]) {
5044                 DRM_DEBUG("si_irq_set: hpd 5\n");
5045                 hpd5 |= DC_HPDx_INT_EN;
5046         }
5047         if (rdev->irq.hpd[5]) {
5048                 DRM_DEBUG("si_irq_set: hpd 6\n");
5049                 hpd6 |= DC_HPDx_INT_EN;
5050         }
5051
5052         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5053         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5054         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5055
5056         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5057         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5058
5059         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5060
5061         if (rdev->num_crtc >= 2) {
5062                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5063                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5064         }
5065         if (rdev->num_crtc >= 4) {
5066                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5067                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5068         }
5069         if (rdev->num_crtc >= 6) {
5070                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5071                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5072         }
5073
5074         if (rdev->num_crtc >= 2) {
5075                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5076                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5077         }
5078         if (rdev->num_crtc >= 4) {
5079                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5080                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5081         }
5082         if (rdev->num_crtc >= 6) {
5083                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5084                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5085         }
5086
5087         if (!ASIC_IS_NODCE(rdev)) {
5088                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5089                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5090                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5091                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5092                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5093                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5094         }
5095
5096         return 0;
5097 }
5098
5099 static inline void si_irq_ack(struct radeon_device *rdev)
5100 {
5101         u32 tmp;
5102
5103         if (ASIC_IS_NODCE(rdev))
5104                 return;
5105
5106         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5107         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5108         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5109         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5110         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5111         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5112         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5113         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5114         if (rdev->num_crtc >= 4) {
5115                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5116                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5117         }
5118         if (rdev->num_crtc >= 6) {
5119                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5120                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5121         }
5122
5123         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5124                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5125         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5126                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5127         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5128                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5129         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5130                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5131         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5132                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5133         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5134                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5135
5136         if (rdev->num_crtc >= 4) {
5137                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5138                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5139                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5140                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5141                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5142                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5143                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5144                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5145                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5146                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5147                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5148                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5149         }
5150
5151         if (rdev->num_crtc >= 6) {
5152                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5153                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5154                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5155                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5156                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5157                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5158                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5159                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5160                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5161                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5162                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5163                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5164         }
5165
5166         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5167                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5168                 tmp |= DC_HPDx_INT_ACK;
5169                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5170         }
5171         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5172                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5173                 tmp |= DC_HPDx_INT_ACK;
5174                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5175         }
5176         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5177                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5178                 tmp |= DC_HPDx_INT_ACK;
5179                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5180         }
5181         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5182                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5183                 tmp |= DC_HPDx_INT_ACK;
5184                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5185         }
5186         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5187                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5188                 tmp |= DC_HPDx_INT_ACK;
5189                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5190         }
5191         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5192                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5193                 tmp |= DC_HPDx_INT_ACK;
5194                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5195         }
5196 }
5197
5198 static void si_irq_disable(struct radeon_device *rdev)
5199 {
5200         si_disable_interrupts(rdev);
5201         /* Wait and acknowledge irq */
5202         mdelay(1);
5203         si_irq_ack(rdev);
5204         si_disable_interrupt_state(rdev);
5205 }
5206
5207 static void si_irq_suspend(struct radeon_device *rdev)
5208 {
5209         si_irq_disable(rdev);
5210         si_rlc_stop(rdev);
5211 }
5212
5213 static void si_irq_fini(struct radeon_device *rdev)
5214 {
5215         si_irq_suspend(rdev);
5216         r600_ih_ring_fini(rdev);
5217 }
5218
5219 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5220 {
5221         u32 wptr, tmp;
5222
5223         if (rdev->wb.enabled)
5224                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5225         else
5226                 wptr = RREG32(IH_RB_WPTR);
5227
5228         if (wptr & RB_OVERFLOW) {
5229                 /* When a ring buffer overflow happen start parsing interrupt
5230                  * from the last not overwritten vector (wptr + 16). Hopefully
5231                  * this should allow us to catchup.
5232                  */
5233                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5234                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5235                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5236                 tmp = RREG32(IH_RB_CNTL);
5237                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5238                 WREG32(IH_RB_CNTL, tmp);
5239         }
5240         return (wptr & rdev->ih.ptr_mask);
5241 }
5242
5243 /*        SI IV Ring
5244  * Each IV ring entry is 128 bits:
5245  * [7:0]    - interrupt source id
5246  * [31:8]   - reserved
5247  * [59:32]  - interrupt source data
5248  * [63:60]  - reserved
5249  * [71:64]  - RINGID
5250  * [79:72]  - VMID
5251  * [127:80] - reserved
5252  */
5253 int si_irq_process(struct radeon_device *rdev)
5254 {
5255         u32 wptr;
5256         u32 rptr;
5257         u32 src_id, src_data, ring_id;
5258         u32 ring_index;
5259         bool queue_hotplug = false;
5260
5261         if (!rdev->ih.enabled || rdev->shutdown)
5262                 return IRQ_NONE;
5263
5264         wptr = si_get_ih_wptr(rdev);
5265
5266 restart_ih:
5267         /* is somebody else already processing irqs? */
5268         if (atomic_xchg(&rdev->ih.lock, 1))
5269                 return IRQ_NONE;
5270
5271         rptr = rdev->ih.rptr;
5272         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5273
5274         /* Order reading of wptr vs. reading of IH ring data */
5275         rmb();
5276
5277         /* display interrupts */
5278         si_irq_ack(rdev);
5279
5280         while (rptr != wptr) {
5281                 /* wptr/rptr are in bytes! */
5282                 ring_index = rptr / 4;
5283                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5284                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5285                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5286
5287                 switch (src_id) {
5288                 case 1: /* D1 vblank/vline */
5289                         switch (src_data) {
5290                         case 0: /* D1 vblank */
5291                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
5292                                         if (rdev->irq.crtc_vblank_int[0]) {
5293                                                 drm_handle_vblank(rdev->ddev, 0);
5294                                                 rdev->pm.vblank_sync = true;
5295                                                 wake_up(&rdev->irq.vblank_queue);
5296                                         }
5297                                         if (atomic_read(&rdev->irq.pflip[0]))
5298                                                 radeon_crtc_handle_flip(rdev, 0);
5299                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5300                                         DRM_DEBUG("IH: D1 vblank\n");
5301                                 }
5302                                 break;
5303                         case 1: /* D1 vline */
5304                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
5305                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5306                                         DRM_DEBUG("IH: D1 vline\n");
5307                                 }
5308                                 break;
5309                         default:
5310                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5311                                 break;
5312                         }
5313                         break;
5314                 case 2: /* D2 vblank/vline */
5315                         switch (src_data) {
5316                         case 0: /* D2 vblank */
5317                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5318                                         if (rdev->irq.crtc_vblank_int[1]) {
5319                                                 drm_handle_vblank(rdev->ddev, 1);
5320                                                 rdev->pm.vblank_sync = true;
5321                                                 wake_up(&rdev->irq.vblank_queue);
5322                                         }
5323                                         if (atomic_read(&rdev->irq.pflip[1]))
5324                                                 radeon_crtc_handle_flip(rdev, 1);
5325                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5326                                         DRM_DEBUG("IH: D2 vblank\n");
5327                                 }
5328                                 break;
5329                         case 1: /* D2 vline */
5330                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5331                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5332                                         DRM_DEBUG("IH: D2 vline\n");
5333                                 }
5334                                 break;
5335                         default:
5336                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5337                                 break;
5338                         }
5339                         break;
5340                 case 3: /* D3 vblank/vline */
5341                         switch (src_data) {
5342                         case 0: /* D3 vblank */
5343                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5344                                         if (rdev->irq.crtc_vblank_int[2]) {
5345                                                 drm_handle_vblank(rdev->ddev, 2);
5346                                                 rdev->pm.vblank_sync = true;
5347                                                 wake_up(&rdev->irq.vblank_queue);
5348                                         }
5349                                         if (atomic_read(&rdev->irq.pflip[2]))
5350                                                 radeon_crtc_handle_flip(rdev, 2);
5351                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5352                                         DRM_DEBUG("IH: D3 vblank\n");
5353                                 }
5354                                 break;
5355                         case 1: /* D3 vline */
5356                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5357                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5358                                         DRM_DEBUG("IH: D3 vline\n");
5359                                 }
5360                                 break;
5361                         default:
5362                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5363                                 break;
5364                         }
5365                         break;
5366                 case 4: /* D4 vblank/vline */
5367                         switch (src_data) {
5368                         case 0: /* D4 vblank */
5369                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5370                                         if (rdev->irq.crtc_vblank_int[3]) {
5371                                                 drm_handle_vblank(rdev->ddev, 3);
5372                                                 rdev->pm.vblank_sync = true;
5373                                                 wake_up(&rdev->irq.vblank_queue);
5374                                         }
5375                                         if (atomic_read(&rdev->irq.pflip[3]))
5376                                                 radeon_crtc_handle_flip(rdev, 3);
5377                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5378                                         DRM_DEBUG("IH: D4 vblank\n");
5379                                 }
5380                                 break;
5381                         case 1: /* D4 vline */
5382                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5383                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5384                                         DRM_DEBUG("IH: D4 vline\n");
5385                                 }
5386                                 break;
5387                         default:
5388                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5389                                 break;
5390                         }
5391                         break;
5392                 case 5: /* D5 vblank/vline */
5393                         switch (src_data) {
5394                         case 0: /* D5 vblank */
5395                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5396                                         if (rdev->irq.crtc_vblank_int[4]) {
5397                                                 drm_handle_vblank(rdev->ddev, 4);
5398                                                 rdev->pm.vblank_sync = true;
5399                                                 wake_up(&rdev->irq.vblank_queue);
5400                                         }
5401                                         if (atomic_read(&rdev->irq.pflip[4]))
5402                                                 radeon_crtc_handle_flip(rdev, 4);
5403                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5404                                         DRM_DEBUG("IH: D5 vblank\n");
5405                                 }
5406                                 break;
5407                         case 1: /* D5 vline */
5408                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5409                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5410                                         DRM_DEBUG("IH: D5 vline\n");
5411                                 }
5412                                 break;
5413                         default:
5414                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5415                                 break;
5416                         }
5417                         break;
5418                 case 6: /* D6 vblank/vline */
5419                         switch (src_data) {
5420                         case 0: /* D6 vblank */
5421                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5422                                         if (rdev->irq.crtc_vblank_int[5]) {
5423                                                 drm_handle_vblank(rdev->ddev, 5);
5424                                                 rdev->pm.vblank_sync = true;
5425                                                 wake_up(&rdev->irq.vblank_queue);
5426                                         }
5427                                         if (atomic_read(&rdev->irq.pflip[5]))
5428                                                 radeon_crtc_handle_flip(rdev, 5);
5429                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5430                                         DRM_DEBUG("IH: D6 vblank\n");
5431                                 }
5432                                 break;
5433                         case 1: /* D6 vline */
5434                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5435                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5436                                         DRM_DEBUG("IH: D6 vline\n");
5437                                 }
5438                                 break;
5439                         default:
5440                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5441                                 break;
5442                         }
5443                         break;
5444                 case 42: /* HPD hotplug */
5445                         switch (src_data) {
5446                         case 0:
5447                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5448                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
5449                                         queue_hotplug = true;
5450                                         DRM_DEBUG("IH: HPD1\n");
5451                                 }
5452                                 break;
5453                         case 1:
5454                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5455                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5456                                         queue_hotplug = true;
5457                                         DRM_DEBUG("IH: HPD2\n");
5458                                 }
5459                                 break;
5460                         case 2:
5461                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5462                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5463                                         queue_hotplug = true;
5464                                         DRM_DEBUG("IH: HPD3\n");
5465                                 }
5466                                 break;
5467                         case 3:
5468                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5469                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5470                                         queue_hotplug = true;
5471                                         DRM_DEBUG("IH: HPD4\n");
5472                                 }
5473                                 break;
5474                         case 4:
5475                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5476                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5477                                         queue_hotplug = true;
5478                                         DRM_DEBUG("IH: HPD5\n");
5479                                 }
5480                                 break;
5481                         case 5:
5482                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5483                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5484                                         queue_hotplug = true;
5485                                         DRM_DEBUG("IH: HPD6\n");
5486                                 }
5487                                 break;
5488                         default:
5489                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5490                                 break;
5491                         }
5492                         break;
5493                 case 146:
5494                 case 147:
5495                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5496                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5497                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5498                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5499                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5500                         /* reset addr and status */
5501                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5502                         break;
5503                 case 176: /* RINGID0 CP_INT */
5504                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5505                         break;
5506                 case 177: /* RINGID1 CP_INT */
5507                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5508                         break;
5509                 case 178: /* RINGID2 CP_INT */
5510                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5511                         break;
5512                 case 181: /* CP EOP event */
5513                         DRM_DEBUG("IH: CP EOP\n");
5514                         switch (ring_id) {
5515                         case 0:
5516                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5517                                 break;
5518                         case 1:
5519                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5520                                 break;
5521                         case 2:
5522                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5523                                 break;
5524                         }
5525                         break;
5526                 case 224: /* DMA trap event */
5527                         DRM_DEBUG("IH: DMA trap\n");
5528                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5529                         break;
5530                 case 233: /* GUI IDLE */
5531                         DRM_DEBUG("IH: GUI idle\n");
5532                         break;
5533                 case 244: /* DMA trap event */
5534                         DRM_DEBUG("IH: DMA1 trap\n");
5535                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5536                         break;
5537                 default:
5538                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5539                         break;
5540                 }
5541
5542                 /* wptr/rptr are in bytes! */
5543                 rptr += 16;
5544                 rptr &= rdev->ih.ptr_mask;
5545         }
5546         if (queue_hotplug)
5547                 schedule_work(&rdev->hotplug_work);
5548         rdev->ih.rptr = rptr;
5549         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5550         atomic_set(&rdev->ih.lock, 0);
5551
5552         /* make sure wptr hasn't changed while processing */
5553         wptr = si_get_ih_wptr(rdev);
5554         if (wptr != rptr)
5555                 goto restart_ih;
5556
5557         return IRQ_HANDLED;
5558 }
5559
5560 /**
5561  * si_copy_dma - copy pages using the DMA engine
5562  *
5563  * @rdev: radeon_device pointer
5564  * @src_offset: src GPU address
5565  * @dst_offset: dst GPU address
5566  * @num_gpu_pages: number of GPU pages to xfer
5567  * @fence: radeon fence object
5568  *
5569  * Copy GPU paging using the DMA engine (SI).
5570  * Used by the radeon ttm implementation to move pages if
5571  * registered as the asic copy callback.
5572  */
5573 int si_copy_dma(struct radeon_device *rdev,
5574                 uint64_t src_offset, uint64_t dst_offset,
5575                 unsigned num_gpu_pages,
5576                 struct radeon_fence **fence)
5577 {
5578         struct radeon_semaphore *sem = NULL;
5579         int ring_index = rdev->asic->copy.dma_ring_index;
5580         struct radeon_ring *ring = &rdev->ring[ring_index];
5581         u32 size_in_bytes, cur_size_in_bytes;
5582         int i, num_loops;
5583         int r = 0;
5584
5585         r = radeon_semaphore_create(rdev, &sem);
5586         if (r) {
5587                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5588                 return r;
5589         }
5590
5591         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5592         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5593         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5594         if (r) {
5595                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5596                 radeon_semaphore_free(rdev, &sem, NULL);
5597                 return r;
5598         }
5599
5600         if (radeon_fence_need_sync(*fence, ring->idx)) {
5601                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5602                                             ring->idx);
5603                 radeon_fence_note_sync(*fence, ring->idx);
5604         } else {
5605                 radeon_semaphore_free(rdev, &sem, NULL);
5606         }
5607
5608         for (i = 0; i < num_loops; i++) {
5609                 cur_size_in_bytes = size_in_bytes;
5610                 if (cur_size_in_bytes > 0xFFFFF)
5611                         cur_size_in_bytes = 0xFFFFF;
5612                 size_in_bytes -= cur_size_in_bytes;
5613                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5614                 radeon_ring_write(ring, dst_offset & 0xffffffff);
5615                 radeon_ring_write(ring, src_offset & 0xffffffff);
5616                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5617                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5618                 src_offset += cur_size_in_bytes;
5619                 dst_offset += cur_size_in_bytes;
5620         }
5621
5622         r = radeon_fence_emit(rdev, fence, ring->idx);
5623         if (r) {
5624                 radeon_ring_unlock_undo(rdev, ring);
5625                 return r;
5626         }
5627
5628         radeon_ring_unlock_commit(rdev, ring);
5629         radeon_semaphore_free(rdev, &sem, *fence);
5630
5631         return r;
5632 }
5633
5634 /*
5635  * startup/shutdown callbacks
5636  */
5637 static int si_startup(struct radeon_device *rdev)
5638 {
5639         struct radeon_ring *ring;
5640         int r;
5641
5642         /* enable pcie gen2/3 link */
5643         si_pcie_gen3_enable(rdev);
5644         /* enable aspm */
5645         si_program_aspm(rdev);
5646
5647         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5648             !rdev->rlc_fw || !rdev->mc_fw) {
5649                 r = si_init_microcode(rdev);
5650                 if (r) {
5651                         DRM_ERROR("Failed to load firmware!\n");
5652                         return r;
5653                 }
5654         }
5655
5656         r = si_mc_load_microcode(rdev);
5657         if (r) {
5658                 DRM_ERROR("Failed to load MC firmware!\n");
5659                 return r;
5660         }
5661
5662         r = r600_vram_scratch_init(rdev);
5663         if (r)
5664                 return r;
5665
5666         si_mc_program(rdev);
5667         r = si_pcie_gart_enable(rdev);
5668         if (r)
5669                 return r;
5670         si_gpu_init(rdev);
5671
5672         /* allocate rlc buffers */
5673         r = si_rlc_init(rdev);
5674         if (r) {
5675                 DRM_ERROR("Failed to init rlc BOs!\n");
5676                 return r;
5677         }
5678
5679         /* allocate wb buffer */
5680         r = radeon_wb_init(rdev);
5681         if (r)
5682                 return r;
5683
5684         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5685         if (r) {
5686                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5687                 return r;
5688         }
5689
5690         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5691         if (r) {
5692                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5693                 return r;
5694         }
5695
5696         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5697         if (r) {
5698                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5699                 return r;
5700         }
5701
5702         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5703         if (r) {
5704                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5705                 return r;
5706         }
5707
5708         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5709         if (r) {
5710                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5711                 return r;
5712         }
5713
5714         if (rdev->has_uvd) {
5715                 r = rv770_uvd_resume(rdev);
5716                 if (!r) {
5717                         r = radeon_fence_driver_start_ring(rdev,
5718                                                            R600_RING_TYPE_UVD_INDEX);
5719                         if (r)
5720                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5721                 }
5722                 if (r)
5723                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5724         }
5725
5726         /* Enable IRQ */
5727         if (!rdev->irq.installed) {
5728                 r = radeon_irq_kms_init(rdev);
5729                 if (r)
5730                         return r;
5731         }
5732
5733         r = si_irq_init(rdev);
5734         if (r) {
5735                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5736                 radeon_irq_kms_fini(rdev);
5737                 return r;
5738         }
5739         si_irq_set(rdev);
5740
5741         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5742         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5743                              CP_RB0_RPTR, CP_RB0_WPTR,
5744                              0, 0xfffff, RADEON_CP_PACKET2);
5745         if (r)
5746                 return r;
5747
5748         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5749         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5750                              CP_RB1_RPTR, CP_RB1_WPTR,
5751                              0, 0xfffff, RADEON_CP_PACKET2);
5752         if (r)
5753                 return r;
5754
5755         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5756         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5757                              CP_RB2_RPTR, CP_RB2_WPTR,
5758                              0, 0xfffff, RADEON_CP_PACKET2);
5759         if (r)
5760                 return r;
5761
5762         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5763         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5764                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5765                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5766                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5767         if (r)
5768                 return r;
5769
5770         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5771         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5772                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5773                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5774                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5775         if (r)
5776                 return r;
5777
5778         r = si_cp_load_microcode(rdev);
5779         if (r)
5780                 return r;
5781         r = si_cp_resume(rdev);
5782         if (r)
5783                 return r;
5784
5785         r = cayman_dma_resume(rdev);
5786         if (r)
5787                 return r;
5788
5789         if (rdev->has_uvd) {
5790                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5791                 if (ring->ring_size) {
5792                         r = radeon_ring_init(rdev, ring, ring->ring_size,
5793                                              R600_WB_UVD_RPTR_OFFSET,
5794                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5795                                              0, 0xfffff, RADEON_CP_PACKET2);
5796                         if (!r)
5797                                 r = r600_uvd_init(rdev);
5798                         if (r)
5799                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5800                 }
5801         }
5802
5803         r = radeon_ib_pool_init(rdev);
5804         if (r) {
5805                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5806                 return r;
5807         }
5808
5809         r = radeon_vm_manager_init(rdev);
5810         if (r) {
5811                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5812                 return r;
5813         }
5814
5815         return 0;
5816 }
5817
5818 int si_resume(struct radeon_device *rdev)
5819 {
5820         int r;
5821
5822         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5823          * posting will perform necessary task to bring back GPU into good
5824          * shape.
5825          */
5826         /* post card */
5827         atom_asic_init(rdev->mode_info.atom_context);
5828
5829         /* init golden registers */
5830         si_init_golden_registers(rdev);
5831
5832         rdev->accel_working = true;
5833         r = si_startup(rdev);
5834         if (r) {
5835                 DRM_ERROR("si startup failed on resume\n");
5836                 rdev->accel_working = false;
5837                 return r;
5838         }
5839
5840         return r;
5841
5842 }
5843
5844 int si_suspend(struct radeon_device *rdev)
5845 {
5846         radeon_vm_manager_fini(rdev);
5847         si_cp_enable(rdev, false);
5848         cayman_dma_stop(rdev);
5849         if (rdev->has_uvd) {
5850                 r600_uvd_rbc_stop(rdev);
5851                 radeon_uvd_suspend(rdev);
5852         }
5853         si_irq_suspend(rdev);
5854         radeon_wb_disable(rdev);
5855         si_pcie_gart_disable(rdev);
5856         return 0;
5857 }
5858
5859 /* Plan is to move initialization in that function and use
5860  * helper function so that radeon_device_init pretty much
5861  * do nothing more than calling asic specific function. This
5862  * should also allow to remove a bunch of callback function
5863  * like vram_info.
5864  */
5865 int si_init(struct radeon_device *rdev)
5866 {
5867         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5868         int r;
5869
5870         /* Read BIOS */
5871         if (!radeon_get_bios(rdev)) {
5872                 if (ASIC_IS_AVIVO(rdev))
5873                         return -EINVAL;
5874         }
5875         /* Must be an ATOMBIOS */
5876         if (!rdev->is_atom_bios) {
5877                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5878                 return -EINVAL;
5879         }
5880         r = radeon_atombios_init(rdev);
5881         if (r)
5882                 return r;
5883
5884         /* Post card if necessary */
5885         if (!radeon_card_posted(rdev)) {
5886                 if (!rdev->bios) {
5887                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5888                         return -EINVAL;
5889                 }
5890                 DRM_INFO("GPU not posted. posting now...\n");
5891                 atom_asic_init(rdev->mode_info.atom_context);
5892         }
5893         /* init golden registers */
5894         si_init_golden_registers(rdev);
5895         /* Initialize scratch registers */
5896         si_scratch_init(rdev);
5897         /* Initialize surface registers */
5898         radeon_surface_init(rdev);
5899         /* Initialize clocks */
5900         radeon_get_clock_info(rdev->ddev);
5901
5902         /* Fence driver */
5903         r = radeon_fence_driver_init(rdev);
5904         if (r)
5905                 return r;
5906
5907         /* initialize memory controller */
5908         r = si_mc_init(rdev);
5909         if (r)
5910                 return r;
5911         /* Memory manager */
5912         r = radeon_bo_init(rdev);
5913         if (r)
5914                 return r;
5915
5916         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5917         ring->ring_obj = NULL;
5918         r600_ring_init(rdev, ring, 1024 * 1024);
5919
5920         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5921         ring->ring_obj = NULL;
5922         r600_ring_init(rdev, ring, 1024 * 1024);
5923
5924         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5925         ring->ring_obj = NULL;
5926         r600_ring_init(rdev, ring, 1024 * 1024);
5927
5928         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5929         ring->ring_obj = NULL;
5930         r600_ring_init(rdev, ring, 64 * 1024);
5931
5932         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5933         ring->ring_obj = NULL;
5934         r600_ring_init(rdev, ring, 64 * 1024);
5935
5936         if (rdev->has_uvd) {
5937                 r = radeon_uvd_init(rdev);
5938                 if (!r) {
5939                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5940                         ring->ring_obj = NULL;
5941                         r600_ring_init(rdev, ring, 4096);
5942                 }
5943         }
5944
5945         rdev->ih.ring_obj = NULL;
5946         r600_ih_ring_init(rdev, 64 * 1024);
5947
5948         r = r600_pcie_gart_init(rdev);
5949         if (r)
5950                 return r;
5951
5952         rdev->accel_working = true;
5953         r = si_startup(rdev);
5954         if (r) {
5955                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5956                 si_cp_fini(rdev);
5957                 cayman_dma_fini(rdev);
5958                 si_irq_fini(rdev);
5959                 si_rlc_fini(rdev);
5960                 radeon_wb_fini(rdev);
5961                 radeon_ib_pool_fini(rdev);
5962                 radeon_vm_manager_fini(rdev);
5963                 radeon_irq_kms_fini(rdev);
5964                 si_pcie_gart_fini(rdev);
5965                 rdev->accel_working = false;
5966         }
5967
5968         /* Don't start up if the MC ucode is missing.
5969          * The default clocks and voltages before the MC ucode
5970          * is loaded are not suffient for advanced operations.
5971          */
5972         if (!rdev->mc_fw) {
5973                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5974                 return -EINVAL;
5975         }
5976
5977         return 0;
5978 }
5979
5980 void si_fini(struct radeon_device *rdev)
5981 {
5982         si_cp_fini(rdev);
5983         cayman_dma_fini(rdev);
5984         si_irq_fini(rdev);
5985         si_rlc_fini(rdev);
5986         radeon_wb_fini(rdev);
5987         radeon_vm_manager_fini(rdev);
5988         radeon_ib_pool_fini(rdev);
5989         radeon_irq_kms_fini(rdev);
5990         if (rdev->has_uvd)
5991                 radeon_uvd_fini(rdev);
5992         si_pcie_gart_fini(rdev);
5993         r600_vram_scratch_fini(rdev);
5994         radeon_gem_fini(rdev);
5995         radeon_fence_driver_fini(rdev);
5996         radeon_bo_fini(rdev);
5997         radeon_atombios_fini(rdev);
5998         kfree(rdev->bios);
5999         rdev->bios = NULL;
6000 }
6001
6002 /**
6003  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6004  *
6005  * @rdev: radeon_device pointer
6006  *
6007  * Fetches a GPU clock counter snapshot (SI).
6008  * Returns the 64 bit clock counter snapshot.
6009  */
6010 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6011 {
6012         uint64_t clock;
6013
6014         mutex_lock(&rdev->gpu_clock_mutex);
6015         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6016         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6017                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6018         mutex_unlock(&rdev->gpu_clock_mutex);
6019         return clock;
6020 }
6021
6022 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6023 {
6024         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6025         int r;
6026
6027         /* bypass vclk and dclk with bclk */
6028         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6029                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6030                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6031
6032         /* put PLL in bypass mode */
6033         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6034
6035         if (!vclk || !dclk) {
6036                 /* keep the Bypass mode, put PLL to sleep */
6037                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6038                 return 0;
6039         }
6040
6041         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6042                                           16384, 0x03FFFFFF, 0, 128, 5,
6043                                           &fb_div, &vclk_div, &dclk_div);
6044         if (r)
6045                 return r;
6046
6047         /* set RESET_ANTI_MUX to 0 */
6048         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6049
6050         /* set VCO_MODE to 1 */
6051         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6052
6053         /* toggle UPLL_SLEEP to 1 then back to 0 */
6054         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6055         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6056
6057         /* deassert UPLL_RESET */
6058         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6059
6060         mdelay(1);
6061
6062         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6063         if (r)
6064                 return r;
6065
6066         /* assert UPLL_RESET again */
6067         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6068
6069         /* disable spread spectrum. */
6070         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6071
6072         /* set feedback divider */
6073         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6074
6075         /* set ref divider to 0 */
6076         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6077
6078         if (fb_div < 307200)
6079                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6080         else
6081                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6082
6083         /* set PDIV_A and PDIV_B */
6084         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6085                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6086                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6087
6088         /* give the PLL some time to settle */
6089         mdelay(15);
6090
6091         /* deassert PLL_RESET */
6092         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6093
6094         mdelay(15);
6095
6096         /* switch from bypass mode to normal mode */
6097         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6098
6099         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6100         if (r)
6101                 return r;
6102
6103         /* switch VCLK and DCLK selection */
6104         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6105                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6106                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6107
6108         mdelay(100);
6109
6110         return 0;
6111 }
6112
6113 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6114 {
6115         struct pci_dev *root = rdev->pdev->bus->self;
6116         int bridge_pos, gpu_pos;
6117         u32 speed_cntl, mask, current_data_rate;
6118         int ret, i;
6119         u16 tmp16;
6120
6121         if (radeon_pcie_gen2 == 0)
6122                 return;
6123
6124         if (rdev->flags & RADEON_IS_IGP)
6125                 return;
6126
6127         if (!(rdev->flags & RADEON_IS_PCIE))
6128                 return;
6129
6130         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6131         if (ret != 0)
6132                 return;
6133
6134         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6135                 return;
6136
6137         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6138         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6139                 LC_CURRENT_DATA_RATE_SHIFT;
6140         if (mask & DRM_PCIE_SPEED_80) {
6141                 if (current_data_rate == 2) {
6142                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6143                         return;
6144                 }
6145                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6146         } else if (mask & DRM_PCIE_SPEED_50) {
6147                 if (current_data_rate == 1) {
6148                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6149                         return;
6150                 }
6151                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6152         }
6153
6154         bridge_pos = pci_pcie_cap(root);
6155         if (!bridge_pos)
6156                 return;
6157
6158         gpu_pos = pci_pcie_cap(rdev->pdev);
6159         if (!gpu_pos)
6160                 return;
6161
6162         if (mask & DRM_PCIE_SPEED_80) {
6163                 /* re-try equalization if gen3 is not already enabled */
6164                 if (current_data_rate != 2) {
6165                         u16 bridge_cfg, gpu_cfg;
6166                         u16 bridge_cfg2, gpu_cfg2;
6167                         u32 max_lw, current_lw, tmp;
6168
6169                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6170                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6171
6172                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6173                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6174
6175                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6176                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6177
6178                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6179                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6180                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6181
6182                         if (current_lw < max_lw) {
6183                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6184                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6185                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6186                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6187                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6188                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6189                                 }
6190                         }
6191
6192                         for (i = 0; i < 10; i++) {
6193                                 /* check status */
6194                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6195                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6196                                         break;
6197
6198                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6199                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6200
6201                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6202                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6203
6204                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6205                                 tmp |= LC_SET_QUIESCE;
6206                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6207
6208                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6209                                 tmp |= LC_REDO_EQ;
6210                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6211
6212                                 mdelay(100);
6213
6214                                 /* linkctl */
6215                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6216                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6217                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6218                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6219
6220                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6221                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6222                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6223                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6224
6225                                 /* linkctl2 */
6226                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6227                                 tmp16 &= ~((1 << 4) | (7 << 9));
6228                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6229                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6230
6231                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6232                                 tmp16 &= ~((1 << 4) | (7 << 9));
6233                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6234                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6235
6236                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6237                                 tmp &= ~LC_SET_QUIESCE;
6238                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6239                         }
6240                 }
6241         }
6242
6243         /* set the link speed */
6244         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6245         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6246         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6247
6248         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6249         tmp16 &= ~0xf;
6250         if (mask & DRM_PCIE_SPEED_80)
6251                 tmp16 |= 3; /* gen3 */
6252         else if (mask & DRM_PCIE_SPEED_50)
6253                 tmp16 |= 2; /* gen2 */
6254         else
6255                 tmp16 |= 1; /* gen1 */
6256         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6257
6258         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6259         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6260         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6261
6262         for (i = 0; i < rdev->usec_timeout; i++) {
6263                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6264                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6265                         break;
6266                 udelay(1);
6267         }
6268 }
6269
6270 static void si_program_aspm(struct radeon_device *rdev)
6271 {
6272         u32 data, orig;
6273         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6274         bool disable_clkreq = false;
6275
6276         if (!(rdev->flags & RADEON_IS_PCIE))
6277                 return;
6278
6279         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6280         data &= ~LC_XMIT_N_FTS_MASK;
6281         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6282         if (orig != data)
6283                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6284
6285         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6286         data |= LC_GO_TO_RECOVERY;
6287         if (orig != data)
6288                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6289
6290         orig = data = RREG32_PCIE(PCIE_P_CNTL);
6291         data |= P_IGNORE_EDB_ERR;
6292         if (orig != data)
6293                 WREG32_PCIE(PCIE_P_CNTL, data);
6294
6295         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6296         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
6297         data |= LC_PMI_TO_L1_DIS;
6298         if (!disable_l0s)
6299                 data |= LC_L0S_INACTIVITY(7);
6300
6301         if (!disable_l1) {
6302                 data |= LC_L1_INACTIVITY(7);
6303                 data &= ~LC_PMI_TO_L1_DIS;
6304                 if (orig != data)
6305                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6306
6307                 if (!disable_plloff_in_l1) {
6308                         bool clk_req_support;
6309
6310                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6311                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6312                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6313                         if (orig != data)
6314                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6315
6316                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6317                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6318                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6319                         if (orig != data)
6320                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6321
6322                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6323                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6324                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6325                         if (orig != data)
6326                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6327
6328                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6329                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6330                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6331                         if (orig != data)
6332                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6333
6334                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
6335                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6336                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6337                                 if (orig != data)
6338                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6339
6340                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6341                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6342                                 if (orig != data)
6343                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6344
6345                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
6346                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
6347                                 if (orig != data)
6348                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
6349
6350                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
6351                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
6352                                 if (orig != data)
6353                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
6354
6355                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6356                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6357                                 if (orig != data)
6358                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6359
6360                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6361                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6362                                 if (orig != data)
6363                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6364
6365                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
6366                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
6367                                 if (orig != data)
6368                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
6369
6370                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
6371                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
6372                                 if (orig != data)
6373                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
6374                         }
6375                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6376                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
6377                         data |= LC_DYN_LANES_PWR_STATE(3);
6378                         if (orig != data)
6379                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
6380
6381                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
6382                         data &= ~LS2_EXIT_TIME_MASK;
6383                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
6384                                 data |= LS2_EXIT_TIME(5);
6385                         if (orig != data)
6386                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
6387
6388                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
6389                         data &= ~LS2_EXIT_TIME_MASK;
6390                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
6391                                 data |= LS2_EXIT_TIME(5);
6392                         if (orig != data)
6393                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
6394
6395                         if (!disable_clkreq) {
6396                                 struct pci_dev *root = rdev->pdev->bus->self;
6397                                 u32 lnkcap;
6398
6399                                 clk_req_support = false;
6400                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
6401                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
6402                                         clk_req_support = true;
6403                         } else {
6404                                 clk_req_support = false;
6405                         }
6406
6407                         if (clk_req_support) {
6408                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
6409                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
6410                                 if (orig != data)
6411                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
6412
6413                                 orig = data = RREG32(THM_CLK_CNTL);
6414                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
6415                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
6416                                 if (orig != data)
6417                                         WREG32(THM_CLK_CNTL, data);
6418
6419                                 orig = data = RREG32(MISC_CLK_CNTL);
6420                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
6421                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
6422                                 if (orig != data)
6423                                         WREG32(MISC_CLK_CNTL, data);
6424
6425                                 orig = data = RREG32(CG_CLKPIN_CNTL);
6426                                 data &= ~BCLK_AS_XCLK;
6427                                 if (orig != data)
6428                                         WREG32(CG_CLKPIN_CNTL, data);
6429
6430                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
6431                                 data &= ~FORCE_BIF_REFCLK_EN;
6432                                 if (orig != data)
6433                                         WREG32(CG_CLKPIN_CNTL_2, data);
6434
6435                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
6436                                 data &= ~MPLL_CLKOUT_SEL_MASK;
6437                                 data |= MPLL_CLKOUT_SEL(4);
6438                                 if (orig != data)
6439                                         WREG32(MPLL_BYPASSCLK_SEL, data);
6440
6441                                 orig = data = RREG32(SPLL_CNTL_MODE);
6442                                 data &= ~SPLL_REFCLK_SEL_MASK;
6443                                 if (orig != data)
6444                                         WREG32(SPLL_CNTL_MODE, data);
6445                         }
6446                 }
6447         } else {
6448                 if (orig != data)
6449                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6450         }
6451
6452         orig = data = RREG32_PCIE(PCIE_CNTL2);
6453         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
6454         if (orig != data)
6455                 WREG32_PCIE(PCIE_CNTL2, data);
6456
6457         if (!disable_l0s) {
6458                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6459                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
6460                         data = RREG32_PCIE(PCIE_LC_STATUS1);
6461                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
6462                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6463                                 data &= ~LC_L0S_INACTIVITY_MASK;
6464                                 if (orig != data)
6465                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6466                         }
6467                 }
6468         }
6469 }