drm/radeon: update radeon_atom_get_voltage_table() for SI
[linux-2.6-block.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
68
69 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
70 extern void r600_ih_ring_fini(struct radeon_device *rdev);
71 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
75 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
76 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
77
78 static const u32 tahiti_golden_rlc_registers[] =
79 {
80         0xc424, 0xffffffff, 0x00601005,
81         0xc47c, 0xffffffff, 0x10104040,
82         0xc488, 0xffffffff, 0x0100000a,
83         0xc314, 0xffffffff, 0x00000800,
84         0xc30c, 0xffffffff, 0x800000f4,
85         0xf4a8, 0xffffffff, 0x00000000
86 };
87
88 static const u32 tahiti_golden_registers[] =
89 {
90         0x9a10, 0x00010000, 0x00018208,
91         0x9830, 0xffffffff, 0x00000000,
92         0x9834, 0xf00fffff, 0x00000400,
93         0x9838, 0x0002021c, 0x00020200,
94         0xc78, 0x00000080, 0x00000000,
95         0xd030, 0x000300c0, 0x00800040,
96         0xd830, 0x000300c0, 0x00800040,
97         0x5bb0, 0x000000f0, 0x00000070,
98         0x5bc0, 0x00200000, 0x50100000,
99         0x7030, 0x31000311, 0x00000011,
100         0x277c, 0x00000003, 0x000007ff,
101         0x240c, 0x000007ff, 0x00000000,
102         0x8a14, 0xf000001f, 0x00000007,
103         0x8b24, 0xffffffff, 0x00ffffff,
104         0x8b10, 0x0000ff0f, 0x00000000,
105         0x28a4c, 0x07ffffff, 0x4e000000,
106         0x28350, 0x3f3f3fff, 0x2a00126a,
107         0x30, 0x000000ff, 0x0040,
108         0x34, 0x00000040, 0x00004040,
109         0x9100, 0x07ffffff, 0x03000000,
110         0x8e88, 0x01ff1f3f, 0x00000000,
111         0x8e84, 0x01ff1f3f, 0x00000000,
112         0x9060, 0x0000007f, 0x00000020,
113         0x9508, 0x00010000, 0x00010000,
114         0xac14, 0x00000200, 0x000002fb,
115         0xac10, 0xffffffff, 0x0000543b,
116         0xac0c, 0xffffffff, 0xa9210876,
117         0x88d0, 0xffffffff, 0x000fff40,
118         0x88d4, 0x0000001f, 0x00000010,
119         0x1410, 0x20000000, 0x20fffed8,
120         0x15c0, 0x000c0fc0, 0x000c0400
121 };
122
123 static const u32 tahiti_golden_registers2[] =
124 {
125         0xc64, 0x00000001, 0x00000001
126 };
127
128 static const u32 pitcairn_golden_rlc_registers[] =
129 {
130         0xc424, 0xffffffff, 0x00601004,
131         0xc47c, 0xffffffff, 0x10102020,
132         0xc488, 0xffffffff, 0x01000020,
133         0xc314, 0xffffffff, 0x00000800,
134         0xc30c, 0xffffffff, 0x800000a4
135 };
136
137 static const u32 pitcairn_golden_registers[] =
138 {
139         0x9a10, 0x00010000, 0x00018208,
140         0x9830, 0xffffffff, 0x00000000,
141         0x9834, 0xf00fffff, 0x00000400,
142         0x9838, 0x0002021c, 0x00020200,
143         0xc78, 0x00000080, 0x00000000,
144         0xd030, 0x000300c0, 0x00800040,
145         0xd830, 0x000300c0, 0x00800040,
146         0x5bb0, 0x000000f0, 0x00000070,
147         0x5bc0, 0x00200000, 0x50100000,
148         0x7030, 0x31000311, 0x00000011,
149         0x2ae4, 0x00073ffe, 0x000022a2,
150         0x240c, 0x000007ff, 0x00000000,
151         0x8a14, 0xf000001f, 0x00000007,
152         0x8b24, 0xffffffff, 0x00ffffff,
153         0x8b10, 0x0000ff0f, 0x00000000,
154         0x28a4c, 0x07ffffff, 0x4e000000,
155         0x28350, 0x3f3f3fff, 0x2a00126a,
156         0x30, 0x000000ff, 0x0040,
157         0x34, 0x00000040, 0x00004040,
158         0x9100, 0x07ffffff, 0x03000000,
159         0x9060, 0x0000007f, 0x00000020,
160         0x9508, 0x00010000, 0x00010000,
161         0xac14, 0x000003ff, 0x000000f7,
162         0xac10, 0xffffffff, 0x00000000,
163         0xac0c, 0xffffffff, 0x32761054,
164         0x88d4, 0x0000001f, 0x00000010,
165         0x15c0, 0x000c0fc0, 0x000c0400
166 };
167
168 static const u32 verde_golden_rlc_registers[] =
169 {
170         0xc424, 0xffffffff, 0x033f1005,
171         0xc47c, 0xffffffff, 0x10808020,
172         0xc488, 0xffffffff, 0x00800008,
173         0xc314, 0xffffffff, 0x00001000,
174         0xc30c, 0xffffffff, 0x80010014
175 };
176
177 static const u32 verde_golden_registers[] =
178 {
179         0x9a10, 0x00010000, 0x00018208,
180         0x9830, 0xffffffff, 0x00000000,
181         0x9834, 0xf00fffff, 0x00000400,
182         0x9838, 0x0002021c, 0x00020200,
183         0xc78, 0x00000080, 0x00000000,
184         0xd030, 0x000300c0, 0x00800040,
185         0xd030, 0x000300c0, 0x00800040,
186         0xd830, 0x000300c0, 0x00800040,
187         0xd830, 0x000300c0, 0x00800040,
188         0x5bb0, 0x000000f0, 0x00000070,
189         0x5bc0, 0x00200000, 0x50100000,
190         0x7030, 0x31000311, 0x00000011,
191         0x2ae4, 0x00073ffe, 0x000022a2,
192         0x2ae4, 0x00073ffe, 0x000022a2,
193         0x2ae4, 0x00073ffe, 0x000022a2,
194         0x240c, 0x000007ff, 0x00000000,
195         0x240c, 0x000007ff, 0x00000000,
196         0x240c, 0x000007ff, 0x00000000,
197         0x8a14, 0xf000001f, 0x00000007,
198         0x8a14, 0xf000001f, 0x00000007,
199         0x8a14, 0xf000001f, 0x00000007,
200         0x8b24, 0xffffffff, 0x00ffffff,
201         0x8b10, 0x0000ff0f, 0x00000000,
202         0x28a4c, 0x07ffffff, 0x4e000000,
203         0x28350, 0x3f3f3fff, 0x0000124a,
204         0x28350, 0x3f3f3fff, 0x0000124a,
205         0x28350, 0x3f3f3fff, 0x0000124a,
206         0x30, 0x000000ff, 0x0040,
207         0x34, 0x00000040, 0x00004040,
208         0x9100, 0x07ffffff, 0x03000000,
209         0x9100, 0x07ffffff, 0x03000000,
210         0x8e88, 0x01ff1f3f, 0x00000000,
211         0x8e88, 0x01ff1f3f, 0x00000000,
212         0x8e88, 0x01ff1f3f, 0x00000000,
213         0x8e84, 0x01ff1f3f, 0x00000000,
214         0x8e84, 0x01ff1f3f, 0x00000000,
215         0x8e84, 0x01ff1f3f, 0x00000000,
216         0x9060, 0x0000007f, 0x00000020,
217         0x9508, 0x00010000, 0x00010000,
218         0xac14, 0x000003ff, 0x00000003,
219         0xac14, 0x000003ff, 0x00000003,
220         0xac14, 0x000003ff, 0x00000003,
221         0xac10, 0xffffffff, 0x00000000,
222         0xac10, 0xffffffff, 0x00000000,
223         0xac10, 0xffffffff, 0x00000000,
224         0xac0c, 0xffffffff, 0x00001032,
225         0xac0c, 0xffffffff, 0x00001032,
226         0xac0c, 0xffffffff, 0x00001032,
227         0x88d4, 0x0000001f, 0x00000010,
228         0x88d4, 0x0000001f, 0x00000010,
229         0x88d4, 0x0000001f, 0x00000010,
230         0x15c0, 0x000c0fc0, 0x000c0400
231 };
232
233 static const u32 oland_golden_rlc_registers[] =
234 {
235         0xc424, 0xffffffff, 0x00601005,
236         0xc47c, 0xffffffff, 0x10104040,
237         0xc488, 0xffffffff, 0x0100000a,
238         0xc314, 0xffffffff, 0x00000800,
239         0xc30c, 0xffffffff, 0x800000f4
240 };
241
242 static const u32 oland_golden_registers[] =
243 {
244         0x9a10, 0x00010000, 0x00018208,
245         0x9830, 0xffffffff, 0x00000000,
246         0x9834, 0xf00fffff, 0x00000400,
247         0x9838, 0x0002021c, 0x00020200,
248         0xc78, 0x00000080, 0x00000000,
249         0xd030, 0x000300c0, 0x00800040,
250         0xd830, 0x000300c0, 0x00800040,
251         0x5bb0, 0x000000f0, 0x00000070,
252         0x5bc0, 0x00200000, 0x50100000,
253         0x7030, 0x31000311, 0x00000011,
254         0x2ae4, 0x00073ffe, 0x000022a2,
255         0x240c, 0x000007ff, 0x00000000,
256         0x8a14, 0xf000001f, 0x00000007,
257         0x8b24, 0xffffffff, 0x00ffffff,
258         0x8b10, 0x0000ff0f, 0x00000000,
259         0x28a4c, 0x07ffffff, 0x4e000000,
260         0x28350, 0x3f3f3fff, 0x00000082,
261         0x30, 0x000000ff, 0x0040,
262         0x34, 0x00000040, 0x00004040,
263         0x9100, 0x07ffffff, 0x03000000,
264         0x9060, 0x0000007f, 0x00000020,
265         0x9508, 0x00010000, 0x00010000,
266         0xac14, 0x000003ff, 0x000000f3,
267         0xac10, 0xffffffff, 0x00000000,
268         0xac0c, 0xffffffff, 0x00003210,
269         0x88d4, 0x0000001f, 0x00000010,
270         0x15c0, 0x000c0fc0, 0x000c0400
271 };
272
273 static const u32 hainan_golden_registers[] =
274 {
275         0x9a10, 0x00010000, 0x00018208,
276         0x9830, 0xffffffff, 0x00000000,
277         0x9834, 0xf00fffff, 0x00000400,
278         0x9838, 0x0002021c, 0x00020200,
279         0xd0c0, 0xff000fff, 0x00000100,
280         0xd030, 0x000300c0, 0x00800040,
281         0xd8c0, 0xff000fff, 0x00000100,
282         0xd830, 0x000300c0, 0x00800040,
283         0x2ae4, 0x00073ffe, 0x000022a2,
284         0x240c, 0x000007ff, 0x00000000,
285         0x8a14, 0xf000001f, 0x00000007,
286         0x8b24, 0xffffffff, 0x00ffffff,
287         0x8b10, 0x0000ff0f, 0x00000000,
288         0x28a4c, 0x07ffffff, 0x4e000000,
289         0x28350, 0x3f3f3fff, 0x00000000,
290         0x30, 0x000000ff, 0x0040,
291         0x34, 0x00000040, 0x00004040,
292         0x9100, 0x03e00000, 0x03600000,
293         0x9060, 0x0000007f, 0x00000020,
294         0x9508, 0x00010000, 0x00010000,
295         0xac14, 0x000003ff, 0x000000f1,
296         0xac10, 0xffffffff, 0x00000000,
297         0xac0c, 0xffffffff, 0x00003210,
298         0x88d4, 0x0000001f, 0x00000010,
299         0x15c0, 0x000c0fc0, 0x000c0400
300 };
301
302 static const u32 hainan_golden_registers2[] =
303 {
304         0x98f8, 0xffffffff, 0x02010001
305 };
306
307 static const u32 tahiti_mgcg_cgcg_init[] =
308 {
309         0xc400, 0xffffffff, 0xfffffffc,
310         0x802c, 0xffffffff, 0xe0000000,
311         0x9a60, 0xffffffff, 0x00000100,
312         0x92a4, 0xffffffff, 0x00000100,
313         0xc164, 0xffffffff, 0x00000100,
314         0x9774, 0xffffffff, 0x00000100,
315         0x8984, 0xffffffff, 0x06000100,
316         0x8a18, 0xffffffff, 0x00000100,
317         0x92a0, 0xffffffff, 0x00000100,
318         0xc380, 0xffffffff, 0x00000100,
319         0x8b28, 0xffffffff, 0x00000100,
320         0x9144, 0xffffffff, 0x00000100,
321         0x8d88, 0xffffffff, 0x00000100,
322         0x8d8c, 0xffffffff, 0x00000100,
323         0x9030, 0xffffffff, 0x00000100,
324         0x9034, 0xffffffff, 0x00000100,
325         0x9038, 0xffffffff, 0x00000100,
326         0x903c, 0xffffffff, 0x00000100,
327         0xad80, 0xffffffff, 0x00000100,
328         0xac54, 0xffffffff, 0x00000100,
329         0x897c, 0xffffffff, 0x06000100,
330         0x9868, 0xffffffff, 0x00000100,
331         0x9510, 0xffffffff, 0x00000100,
332         0xaf04, 0xffffffff, 0x00000100,
333         0xae04, 0xffffffff, 0x00000100,
334         0x949c, 0xffffffff, 0x00000100,
335         0x802c, 0xffffffff, 0xe0000000,
336         0x9160, 0xffffffff, 0x00010000,
337         0x9164, 0xffffffff, 0x00030002,
338         0x9168, 0xffffffff, 0x00040007,
339         0x916c, 0xffffffff, 0x00060005,
340         0x9170, 0xffffffff, 0x00090008,
341         0x9174, 0xffffffff, 0x00020001,
342         0x9178, 0xffffffff, 0x00040003,
343         0x917c, 0xffffffff, 0x00000007,
344         0x9180, 0xffffffff, 0x00060005,
345         0x9184, 0xffffffff, 0x00090008,
346         0x9188, 0xffffffff, 0x00030002,
347         0x918c, 0xffffffff, 0x00050004,
348         0x9190, 0xffffffff, 0x00000008,
349         0x9194, 0xffffffff, 0x00070006,
350         0x9198, 0xffffffff, 0x000a0009,
351         0x919c, 0xffffffff, 0x00040003,
352         0x91a0, 0xffffffff, 0x00060005,
353         0x91a4, 0xffffffff, 0x00000009,
354         0x91a8, 0xffffffff, 0x00080007,
355         0x91ac, 0xffffffff, 0x000b000a,
356         0x91b0, 0xffffffff, 0x00050004,
357         0x91b4, 0xffffffff, 0x00070006,
358         0x91b8, 0xffffffff, 0x0008000b,
359         0x91bc, 0xffffffff, 0x000a0009,
360         0x91c0, 0xffffffff, 0x000d000c,
361         0x91c4, 0xffffffff, 0x00060005,
362         0x91c8, 0xffffffff, 0x00080007,
363         0x91cc, 0xffffffff, 0x0000000b,
364         0x91d0, 0xffffffff, 0x000a0009,
365         0x91d4, 0xffffffff, 0x000d000c,
366         0x91d8, 0xffffffff, 0x00070006,
367         0x91dc, 0xffffffff, 0x00090008,
368         0x91e0, 0xffffffff, 0x0000000c,
369         0x91e4, 0xffffffff, 0x000b000a,
370         0x91e8, 0xffffffff, 0x000e000d,
371         0x91ec, 0xffffffff, 0x00080007,
372         0x91f0, 0xffffffff, 0x000a0009,
373         0x91f4, 0xffffffff, 0x0000000d,
374         0x91f8, 0xffffffff, 0x000c000b,
375         0x91fc, 0xffffffff, 0x000f000e,
376         0x9200, 0xffffffff, 0x00090008,
377         0x9204, 0xffffffff, 0x000b000a,
378         0x9208, 0xffffffff, 0x000c000f,
379         0x920c, 0xffffffff, 0x000e000d,
380         0x9210, 0xffffffff, 0x00110010,
381         0x9214, 0xffffffff, 0x000a0009,
382         0x9218, 0xffffffff, 0x000c000b,
383         0x921c, 0xffffffff, 0x0000000f,
384         0x9220, 0xffffffff, 0x000e000d,
385         0x9224, 0xffffffff, 0x00110010,
386         0x9228, 0xffffffff, 0x000b000a,
387         0x922c, 0xffffffff, 0x000d000c,
388         0x9230, 0xffffffff, 0x00000010,
389         0x9234, 0xffffffff, 0x000f000e,
390         0x9238, 0xffffffff, 0x00120011,
391         0x923c, 0xffffffff, 0x000c000b,
392         0x9240, 0xffffffff, 0x000e000d,
393         0x9244, 0xffffffff, 0x00000011,
394         0x9248, 0xffffffff, 0x0010000f,
395         0x924c, 0xffffffff, 0x00130012,
396         0x9250, 0xffffffff, 0x000d000c,
397         0x9254, 0xffffffff, 0x000f000e,
398         0x9258, 0xffffffff, 0x00100013,
399         0x925c, 0xffffffff, 0x00120011,
400         0x9260, 0xffffffff, 0x00150014,
401         0x9264, 0xffffffff, 0x000e000d,
402         0x9268, 0xffffffff, 0x0010000f,
403         0x926c, 0xffffffff, 0x00000013,
404         0x9270, 0xffffffff, 0x00120011,
405         0x9274, 0xffffffff, 0x00150014,
406         0x9278, 0xffffffff, 0x000f000e,
407         0x927c, 0xffffffff, 0x00110010,
408         0x9280, 0xffffffff, 0x00000014,
409         0x9284, 0xffffffff, 0x00130012,
410         0x9288, 0xffffffff, 0x00160015,
411         0x928c, 0xffffffff, 0x0010000f,
412         0x9290, 0xffffffff, 0x00120011,
413         0x9294, 0xffffffff, 0x00000015,
414         0x9298, 0xffffffff, 0x00140013,
415         0x929c, 0xffffffff, 0x00170016,
416         0x9150, 0xffffffff, 0x96940200,
417         0x8708, 0xffffffff, 0x00900100,
418         0xc478, 0xffffffff, 0x00000080,
419         0xc404, 0xffffffff, 0x0020003f,
420         0x30, 0xffffffff, 0x0000001c,
421         0x34, 0x000f0000, 0x000f0000,
422         0x160c, 0xffffffff, 0x00000100,
423         0x1024, 0xffffffff, 0x00000100,
424         0x102c, 0x00000101, 0x00000000,
425         0x20a8, 0xffffffff, 0x00000104,
426         0x264c, 0x000c0000, 0x000c0000,
427         0x2648, 0x000c0000, 0x000c0000,
428         0x55e4, 0xff000fff, 0x00000100,
429         0x55e8, 0x00000001, 0x00000001,
430         0x2f50, 0x00000001, 0x00000001,
431         0x30cc, 0xc0000fff, 0x00000104,
432         0xc1e4, 0x00000001, 0x00000001,
433         0xd0c0, 0xfffffff0, 0x00000100,
434         0xd8c0, 0xfffffff0, 0x00000100
435 };
436
437 static const u32 pitcairn_mgcg_cgcg_init[] =
438 {
439         0xc400, 0xffffffff, 0xfffffffc,
440         0x802c, 0xffffffff, 0xe0000000,
441         0x9a60, 0xffffffff, 0x00000100,
442         0x92a4, 0xffffffff, 0x00000100,
443         0xc164, 0xffffffff, 0x00000100,
444         0x9774, 0xffffffff, 0x00000100,
445         0x8984, 0xffffffff, 0x06000100,
446         0x8a18, 0xffffffff, 0x00000100,
447         0x92a0, 0xffffffff, 0x00000100,
448         0xc380, 0xffffffff, 0x00000100,
449         0x8b28, 0xffffffff, 0x00000100,
450         0x9144, 0xffffffff, 0x00000100,
451         0x8d88, 0xffffffff, 0x00000100,
452         0x8d8c, 0xffffffff, 0x00000100,
453         0x9030, 0xffffffff, 0x00000100,
454         0x9034, 0xffffffff, 0x00000100,
455         0x9038, 0xffffffff, 0x00000100,
456         0x903c, 0xffffffff, 0x00000100,
457         0xad80, 0xffffffff, 0x00000100,
458         0xac54, 0xffffffff, 0x00000100,
459         0x897c, 0xffffffff, 0x06000100,
460         0x9868, 0xffffffff, 0x00000100,
461         0x9510, 0xffffffff, 0x00000100,
462         0xaf04, 0xffffffff, 0x00000100,
463         0xae04, 0xffffffff, 0x00000100,
464         0x949c, 0xffffffff, 0x00000100,
465         0x802c, 0xffffffff, 0xe0000000,
466         0x9160, 0xffffffff, 0x00010000,
467         0x9164, 0xffffffff, 0x00030002,
468         0x9168, 0xffffffff, 0x00040007,
469         0x916c, 0xffffffff, 0x00060005,
470         0x9170, 0xffffffff, 0x00090008,
471         0x9174, 0xffffffff, 0x00020001,
472         0x9178, 0xffffffff, 0x00040003,
473         0x917c, 0xffffffff, 0x00000007,
474         0x9180, 0xffffffff, 0x00060005,
475         0x9184, 0xffffffff, 0x00090008,
476         0x9188, 0xffffffff, 0x00030002,
477         0x918c, 0xffffffff, 0x00050004,
478         0x9190, 0xffffffff, 0x00000008,
479         0x9194, 0xffffffff, 0x00070006,
480         0x9198, 0xffffffff, 0x000a0009,
481         0x919c, 0xffffffff, 0x00040003,
482         0x91a0, 0xffffffff, 0x00060005,
483         0x91a4, 0xffffffff, 0x00000009,
484         0x91a8, 0xffffffff, 0x00080007,
485         0x91ac, 0xffffffff, 0x000b000a,
486         0x91b0, 0xffffffff, 0x00050004,
487         0x91b4, 0xffffffff, 0x00070006,
488         0x91b8, 0xffffffff, 0x0008000b,
489         0x91bc, 0xffffffff, 0x000a0009,
490         0x91c0, 0xffffffff, 0x000d000c,
491         0x9200, 0xffffffff, 0x00090008,
492         0x9204, 0xffffffff, 0x000b000a,
493         0x9208, 0xffffffff, 0x000c000f,
494         0x920c, 0xffffffff, 0x000e000d,
495         0x9210, 0xffffffff, 0x00110010,
496         0x9214, 0xffffffff, 0x000a0009,
497         0x9218, 0xffffffff, 0x000c000b,
498         0x921c, 0xffffffff, 0x0000000f,
499         0x9220, 0xffffffff, 0x000e000d,
500         0x9224, 0xffffffff, 0x00110010,
501         0x9228, 0xffffffff, 0x000b000a,
502         0x922c, 0xffffffff, 0x000d000c,
503         0x9230, 0xffffffff, 0x00000010,
504         0x9234, 0xffffffff, 0x000f000e,
505         0x9238, 0xffffffff, 0x00120011,
506         0x923c, 0xffffffff, 0x000c000b,
507         0x9240, 0xffffffff, 0x000e000d,
508         0x9244, 0xffffffff, 0x00000011,
509         0x9248, 0xffffffff, 0x0010000f,
510         0x924c, 0xffffffff, 0x00130012,
511         0x9250, 0xffffffff, 0x000d000c,
512         0x9254, 0xffffffff, 0x000f000e,
513         0x9258, 0xffffffff, 0x00100013,
514         0x925c, 0xffffffff, 0x00120011,
515         0x9260, 0xffffffff, 0x00150014,
516         0x9150, 0xffffffff, 0x96940200,
517         0x8708, 0xffffffff, 0x00900100,
518         0xc478, 0xffffffff, 0x00000080,
519         0xc404, 0xffffffff, 0x0020003f,
520         0x30, 0xffffffff, 0x0000001c,
521         0x34, 0x000f0000, 0x000f0000,
522         0x160c, 0xffffffff, 0x00000100,
523         0x1024, 0xffffffff, 0x00000100,
524         0x102c, 0x00000101, 0x00000000,
525         0x20a8, 0xffffffff, 0x00000104,
526         0x55e4, 0xff000fff, 0x00000100,
527         0x55e8, 0x00000001, 0x00000001,
528         0x2f50, 0x00000001, 0x00000001,
529         0x30cc, 0xc0000fff, 0x00000104,
530         0xc1e4, 0x00000001, 0x00000001,
531         0xd0c0, 0xfffffff0, 0x00000100,
532         0xd8c0, 0xfffffff0, 0x00000100
533 };
534
535 static const u32 verde_mgcg_cgcg_init[] =
536 {
537         0xc400, 0xffffffff, 0xfffffffc,
538         0x802c, 0xffffffff, 0xe0000000,
539         0x9a60, 0xffffffff, 0x00000100,
540         0x92a4, 0xffffffff, 0x00000100,
541         0xc164, 0xffffffff, 0x00000100,
542         0x9774, 0xffffffff, 0x00000100,
543         0x8984, 0xffffffff, 0x06000100,
544         0x8a18, 0xffffffff, 0x00000100,
545         0x92a0, 0xffffffff, 0x00000100,
546         0xc380, 0xffffffff, 0x00000100,
547         0x8b28, 0xffffffff, 0x00000100,
548         0x9144, 0xffffffff, 0x00000100,
549         0x8d88, 0xffffffff, 0x00000100,
550         0x8d8c, 0xffffffff, 0x00000100,
551         0x9030, 0xffffffff, 0x00000100,
552         0x9034, 0xffffffff, 0x00000100,
553         0x9038, 0xffffffff, 0x00000100,
554         0x903c, 0xffffffff, 0x00000100,
555         0xad80, 0xffffffff, 0x00000100,
556         0xac54, 0xffffffff, 0x00000100,
557         0x897c, 0xffffffff, 0x06000100,
558         0x9868, 0xffffffff, 0x00000100,
559         0x9510, 0xffffffff, 0x00000100,
560         0xaf04, 0xffffffff, 0x00000100,
561         0xae04, 0xffffffff, 0x00000100,
562         0x949c, 0xffffffff, 0x00000100,
563         0x802c, 0xffffffff, 0xe0000000,
564         0x9160, 0xffffffff, 0x00010000,
565         0x9164, 0xffffffff, 0x00030002,
566         0x9168, 0xffffffff, 0x00040007,
567         0x916c, 0xffffffff, 0x00060005,
568         0x9170, 0xffffffff, 0x00090008,
569         0x9174, 0xffffffff, 0x00020001,
570         0x9178, 0xffffffff, 0x00040003,
571         0x917c, 0xffffffff, 0x00000007,
572         0x9180, 0xffffffff, 0x00060005,
573         0x9184, 0xffffffff, 0x00090008,
574         0x9188, 0xffffffff, 0x00030002,
575         0x918c, 0xffffffff, 0x00050004,
576         0x9190, 0xffffffff, 0x00000008,
577         0x9194, 0xffffffff, 0x00070006,
578         0x9198, 0xffffffff, 0x000a0009,
579         0x919c, 0xffffffff, 0x00040003,
580         0x91a0, 0xffffffff, 0x00060005,
581         0x91a4, 0xffffffff, 0x00000009,
582         0x91a8, 0xffffffff, 0x00080007,
583         0x91ac, 0xffffffff, 0x000b000a,
584         0x91b0, 0xffffffff, 0x00050004,
585         0x91b4, 0xffffffff, 0x00070006,
586         0x91b8, 0xffffffff, 0x0008000b,
587         0x91bc, 0xffffffff, 0x000a0009,
588         0x91c0, 0xffffffff, 0x000d000c,
589         0x9200, 0xffffffff, 0x00090008,
590         0x9204, 0xffffffff, 0x000b000a,
591         0x9208, 0xffffffff, 0x000c000f,
592         0x920c, 0xffffffff, 0x000e000d,
593         0x9210, 0xffffffff, 0x00110010,
594         0x9214, 0xffffffff, 0x000a0009,
595         0x9218, 0xffffffff, 0x000c000b,
596         0x921c, 0xffffffff, 0x0000000f,
597         0x9220, 0xffffffff, 0x000e000d,
598         0x9224, 0xffffffff, 0x00110010,
599         0x9228, 0xffffffff, 0x000b000a,
600         0x922c, 0xffffffff, 0x000d000c,
601         0x9230, 0xffffffff, 0x00000010,
602         0x9234, 0xffffffff, 0x000f000e,
603         0x9238, 0xffffffff, 0x00120011,
604         0x923c, 0xffffffff, 0x000c000b,
605         0x9240, 0xffffffff, 0x000e000d,
606         0x9244, 0xffffffff, 0x00000011,
607         0x9248, 0xffffffff, 0x0010000f,
608         0x924c, 0xffffffff, 0x00130012,
609         0x9250, 0xffffffff, 0x000d000c,
610         0x9254, 0xffffffff, 0x000f000e,
611         0x9258, 0xffffffff, 0x00100013,
612         0x925c, 0xffffffff, 0x00120011,
613         0x9260, 0xffffffff, 0x00150014,
614         0x9150, 0xffffffff, 0x96940200,
615         0x8708, 0xffffffff, 0x00900100,
616         0xc478, 0xffffffff, 0x00000080,
617         0xc404, 0xffffffff, 0x0020003f,
618         0x30, 0xffffffff, 0x0000001c,
619         0x34, 0x000f0000, 0x000f0000,
620         0x160c, 0xffffffff, 0x00000100,
621         0x1024, 0xffffffff, 0x00000100,
622         0x102c, 0x00000101, 0x00000000,
623         0x20a8, 0xffffffff, 0x00000104,
624         0x264c, 0x000c0000, 0x000c0000,
625         0x2648, 0x000c0000, 0x000c0000,
626         0x55e4, 0xff000fff, 0x00000100,
627         0x55e8, 0x00000001, 0x00000001,
628         0x2f50, 0x00000001, 0x00000001,
629         0x30cc, 0xc0000fff, 0x00000104,
630         0xc1e4, 0x00000001, 0x00000001,
631         0xd0c0, 0xfffffff0, 0x00000100,
632         0xd8c0, 0xfffffff0, 0x00000100
633 };
634
635 static const u32 oland_mgcg_cgcg_init[] =
636 {
637         0xc400, 0xffffffff, 0xfffffffc,
638         0x802c, 0xffffffff, 0xe0000000,
639         0x9a60, 0xffffffff, 0x00000100,
640         0x92a4, 0xffffffff, 0x00000100,
641         0xc164, 0xffffffff, 0x00000100,
642         0x9774, 0xffffffff, 0x00000100,
643         0x8984, 0xffffffff, 0x06000100,
644         0x8a18, 0xffffffff, 0x00000100,
645         0x92a0, 0xffffffff, 0x00000100,
646         0xc380, 0xffffffff, 0x00000100,
647         0x8b28, 0xffffffff, 0x00000100,
648         0x9144, 0xffffffff, 0x00000100,
649         0x8d88, 0xffffffff, 0x00000100,
650         0x8d8c, 0xffffffff, 0x00000100,
651         0x9030, 0xffffffff, 0x00000100,
652         0x9034, 0xffffffff, 0x00000100,
653         0x9038, 0xffffffff, 0x00000100,
654         0x903c, 0xffffffff, 0x00000100,
655         0xad80, 0xffffffff, 0x00000100,
656         0xac54, 0xffffffff, 0x00000100,
657         0x897c, 0xffffffff, 0x06000100,
658         0x9868, 0xffffffff, 0x00000100,
659         0x9510, 0xffffffff, 0x00000100,
660         0xaf04, 0xffffffff, 0x00000100,
661         0xae04, 0xffffffff, 0x00000100,
662         0x949c, 0xffffffff, 0x00000100,
663         0x802c, 0xffffffff, 0xe0000000,
664         0x9160, 0xffffffff, 0x00010000,
665         0x9164, 0xffffffff, 0x00030002,
666         0x9168, 0xffffffff, 0x00040007,
667         0x916c, 0xffffffff, 0x00060005,
668         0x9170, 0xffffffff, 0x00090008,
669         0x9174, 0xffffffff, 0x00020001,
670         0x9178, 0xffffffff, 0x00040003,
671         0x917c, 0xffffffff, 0x00000007,
672         0x9180, 0xffffffff, 0x00060005,
673         0x9184, 0xffffffff, 0x00090008,
674         0x9188, 0xffffffff, 0x00030002,
675         0x918c, 0xffffffff, 0x00050004,
676         0x9190, 0xffffffff, 0x00000008,
677         0x9194, 0xffffffff, 0x00070006,
678         0x9198, 0xffffffff, 0x000a0009,
679         0x919c, 0xffffffff, 0x00040003,
680         0x91a0, 0xffffffff, 0x00060005,
681         0x91a4, 0xffffffff, 0x00000009,
682         0x91a8, 0xffffffff, 0x00080007,
683         0x91ac, 0xffffffff, 0x000b000a,
684         0x91b0, 0xffffffff, 0x00050004,
685         0x91b4, 0xffffffff, 0x00070006,
686         0x91b8, 0xffffffff, 0x0008000b,
687         0x91bc, 0xffffffff, 0x000a0009,
688         0x91c0, 0xffffffff, 0x000d000c,
689         0x91c4, 0xffffffff, 0x00060005,
690         0x91c8, 0xffffffff, 0x00080007,
691         0x91cc, 0xffffffff, 0x0000000b,
692         0x91d0, 0xffffffff, 0x000a0009,
693         0x91d4, 0xffffffff, 0x000d000c,
694         0x9150, 0xffffffff, 0x96940200,
695         0x8708, 0xffffffff, 0x00900100,
696         0xc478, 0xffffffff, 0x00000080,
697         0xc404, 0xffffffff, 0x0020003f,
698         0x30, 0xffffffff, 0x0000001c,
699         0x34, 0x000f0000, 0x000f0000,
700         0x160c, 0xffffffff, 0x00000100,
701         0x1024, 0xffffffff, 0x00000100,
702         0x102c, 0x00000101, 0x00000000,
703         0x20a8, 0xffffffff, 0x00000104,
704         0x264c, 0x000c0000, 0x000c0000,
705         0x2648, 0x000c0000, 0x000c0000,
706         0x55e4, 0xff000fff, 0x00000100,
707         0x55e8, 0x00000001, 0x00000001,
708         0x2f50, 0x00000001, 0x00000001,
709         0x30cc, 0xc0000fff, 0x00000104,
710         0xc1e4, 0x00000001, 0x00000001,
711         0xd0c0, 0xfffffff0, 0x00000100,
712         0xd8c0, 0xfffffff0, 0x00000100
713 };
714
715 static const u32 hainan_mgcg_cgcg_init[] =
716 {
717         0xc400, 0xffffffff, 0xfffffffc,
718         0x802c, 0xffffffff, 0xe0000000,
719         0x9a60, 0xffffffff, 0x00000100,
720         0x92a4, 0xffffffff, 0x00000100,
721         0xc164, 0xffffffff, 0x00000100,
722         0x9774, 0xffffffff, 0x00000100,
723         0x8984, 0xffffffff, 0x06000100,
724         0x8a18, 0xffffffff, 0x00000100,
725         0x92a0, 0xffffffff, 0x00000100,
726         0xc380, 0xffffffff, 0x00000100,
727         0x8b28, 0xffffffff, 0x00000100,
728         0x9144, 0xffffffff, 0x00000100,
729         0x8d88, 0xffffffff, 0x00000100,
730         0x8d8c, 0xffffffff, 0x00000100,
731         0x9030, 0xffffffff, 0x00000100,
732         0x9034, 0xffffffff, 0x00000100,
733         0x9038, 0xffffffff, 0x00000100,
734         0x903c, 0xffffffff, 0x00000100,
735         0xad80, 0xffffffff, 0x00000100,
736         0xac54, 0xffffffff, 0x00000100,
737         0x897c, 0xffffffff, 0x06000100,
738         0x9868, 0xffffffff, 0x00000100,
739         0x9510, 0xffffffff, 0x00000100,
740         0xaf04, 0xffffffff, 0x00000100,
741         0xae04, 0xffffffff, 0x00000100,
742         0x949c, 0xffffffff, 0x00000100,
743         0x802c, 0xffffffff, 0xe0000000,
744         0x9160, 0xffffffff, 0x00010000,
745         0x9164, 0xffffffff, 0x00030002,
746         0x9168, 0xffffffff, 0x00040007,
747         0x916c, 0xffffffff, 0x00060005,
748         0x9170, 0xffffffff, 0x00090008,
749         0x9174, 0xffffffff, 0x00020001,
750         0x9178, 0xffffffff, 0x00040003,
751         0x917c, 0xffffffff, 0x00000007,
752         0x9180, 0xffffffff, 0x00060005,
753         0x9184, 0xffffffff, 0x00090008,
754         0x9188, 0xffffffff, 0x00030002,
755         0x918c, 0xffffffff, 0x00050004,
756         0x9190, 0xffffffff, 0x00000008,
757         0x9194, 0xffffffff, 0x00070006,
758         0x9198, 0xffffffff, 0x000a0009,
759         0x919c, 0xffffffff, 0x00040003,
760         0x91a0, 0xffffffff, 0x00060005,
761         0x91a4, 0xffffffff, 0x00000009,
762         0x91a8, 0xffffffff, 0x00080007,
763         0x91ac, 0xffffffff, 0x000b000a,
764         0x91b0, 0xffffffff, 0x00050004,
765         0x91b4, 0xffffffff, 0x00070006,
766         0x91b8, 0xffffffff, 0x0008000b,
767         0x91bc, 0xffffffff, 0x000a0009,
768         0x91c0, 0xffffffff, 0x000d000c,
769         0x91c4, 0xffffffff, 0x00060005,
770         0x91c8, 0xffffffff, 0x00080007,
771         0x91cc, 0xffffffff, 0x0000000b,
772         0x91d0, 0xffffffff, 0x000a0009,
773         0x91d4, 0xffffffff, 0x000d000c,
774         0x9150, 0xffffffff, 0x96940200,
775         0x8708, 0xffffffff, 0x00900100,
776         0xc478, 0xffffffff, 0x00000080,
777         0xc404, 0xffffffff, 0x0020003f,
778         0x30, 0xffffffff, 0x0000001c,
779         0x34, 0x000f0000, 0x000f0000,
780         0x160c, 0xffffffff, 0x00000100,
781         0x1024, 0xffffffff, 0x00000100,
782         0x20a8, 0xffffffff, 0x00000104,
783         0x264c, 0x000c0000, 0x000c0000,
784         0x2648, 0x000c0000, 0x000c0000,
785         0x2f50, 0x00000001, 0x00000001,
786         0x30cc, 0xc0000fff, 0x00000104,
787         0xc1e4, 0x00000001, 0x00000001,
788         0xd0c0, 0xfffffff0, 0x00000100,
789         0xd8c0, 0xfffffff0, 0x00000100
790 };
791
792 static u32 verde_pg_init[] =
793 {
794         0x353c, 0xffffffff, 0x40000,
795         0x3538, 0xffffffff, 0x200010ff,
796         0x353c, 0xffffffff, 0x0,
797         0x353c, 0xffffffff, 0x0,
798         0x353c, 0xffffffff, 0x0,
799         0x353c, 0xffffffff, 0x0,
800         0x353c, 0xffffffff, 0x0,
801         0x353c, 0xffffffff, 0x7007,
802         0x3538, 0xffffffff, 0x300010ff,
803         0x353c, 0xffffffff, 0x0,
804         0x353c, 0xffffffff, 0x0,
805         0x353c, 0xffffffff, 0x0,
806         0x353c, 0xffffffff, 0x0,
807         0x353c, 0xffffffff, 0x0,
808         0x353c, 0xffffffff, 0x400000,
809         0x3538, 0xffffffff, 0x100010ff,
810         0x353c, 0xffffffff, 0x0,
811         0x353c, 0xffffffff, 0x0,
812         0x353c, 0xffffffff, 0x0,
813         0x353c, 0xffffffff, 0x0,
814         0x353c, 0xffffffff, 0x0,
815         0x353c, 0xffffffff, 0x120200,
816         0x3538, 0xffffffff, 0x500010ff,
817         0x353c, 0xffffffff, 0x0,
818         0x353c, 0xffffffff, 0x0,
819         0x353c, 0xffffffff, 0x0,
820         0x353c, 0xffffffff, 0x0,
821         0x353c, 0xffffffff, 0x0,
822         0x353c, 0xffffffff, 0x1e1e16,
823         0x3538, 0xffffffff, 0x600010ff,
824         0x353c, 0xffffffff, 0x0,
825         0x353c, 0xffffffff, 0x0,
826         0x353c, 0xffffffff, 0x0,
827         0x353c, 0xffffffff, 0x0,
828         0x353c, 0xffffffff, 0x0,
829         0x353c, 0xffffffff, 0x171f1e,
830         0x3538, 0xffffffff, 0x700010ff,
831         0x353c, 0xffffffff, 0x0,
832         0x353c, 0xffffffff, 0x0,
833         0x353c, 0xffffffff, 0x0,
834         0x353c, 0xffffffff, 0x0,
835         0x353c, 0xffffffff, 0x0,
836         0x353c, 0xffffffff, 0x0,
837         0x3538, 0xffffffff, 0x9ff,
838         0x3500, 0xffffffff, 0x0,
839         0x3504, 0xffffffff, 0x10000800,
840         0x3504, 0xffffffff, 0xf,
841         0x3504, 0xffffffff, 0xf,
842         0x3500, 0xffffffff, 0x4,
843         0x3504, 0xffffffff, 0x1000051e,
844         0x3504, 0xffffffff, 0xffff,
845         0x3504, 0xffffffff, 0xffff,
846         0x3500, 0xffffffff, 0x8,
847         0x3504, 0xffffffff, 0x80500,
848         0x3500, 0xffffffff, 0x12,
849         0x3504, 0xffffffff, 0x9050c,
850         0x3500, 0xffffffff, 0x1d,
851         0x3504, 0xffffffff, 0xb052c,
852         0x3500, 0xffffffff, 0x2a,
853         0x3504, 0xffffffff, 0x1053e,
854         0x3500, 0xffffffff, 0x2d,
855         0x3504, 0xffffffff, 0x10546,
856         0x3500, 0xffffffff, 0x30,
857         0x3504, 0xffffffff, 0xa054e,
858         0x3500, 0xffffffff, 0x3c,
859         0x3504, 0xffffffff, 0x1055f,
860         0x3500, 0xffffffff, 0x3f,
861         0x3504, 0xffffffff, 0x10567,
862         0x3500, 0xffffffff, 0x42,
863         0x3504, 0xffffffff, 0x1056f,
864         0x3500, 0xffffffff, 0x45,
865         0x3504, 0xffffffff, 0x10572,
866         0x3500, 0xffffffff, 0x48,
867         0x3504, 0xffffffff, 0x20575,
868         0x3500, 0xffffffff, 0x4c,
869         0x3504, 0xffffffff, 0x190801,
870         0x3500, 0xffffffff, 0x67,
871         0x3504, 0xffffffff, 0x1082a,
872         0x3500, 0xffffffff, 0x6a,
873         0x3504, 0xffffffff, 0x1b082d,
874         0x3500, 0xffffffff, 0x87,
875         0x3504, 0xffffffff, 0x310851,
876         0x3500, 0xffffffff, 0xba,
877         0x3504, 0xffffffff, 0x891,
878         0x3500, 0xffffffff, 0xbc,
879         0x3504, 0xffffffff, 0x893,
880         0x3500, 0xffffffff, 0xbe,
881         0x3504, 0xffffffff, 0x20895,
882         0x3500, 0xffffffff, 0xc2,
883         0x3504, 0xffffffff, 0x20899,
884         0x3500, 0xffffffff, 0xc6,
885         0x3504, 0xffffffff, 0x2089d,
886         0x3500, 0xffffffff, 0xca,
887         0x3504, 0xffffffff, 0x8a1,
888         0x3500, 0xffffffff, 0xcc,
889         0x3504, 0xffffffff, 0x8a3,
890         0x3500, 0xffffffff, 0xce,
891         0x3504, 0xffffffff, 0x308a5,
892         0x3500, 0xffffffff, 0xd3,
893         0x3504, 0xffffffff, 0x6d08cd,
894         0x3500, 0xffffffff, 0x142,
895         0x3504, 0xffffffff, 0x2000095a,
896         0x3504, 0xffffffff, 0x1,
897         0x3500, 0xffffffff, 0x144,
898         0x3504, 0xffffffff, 0x301f095b,
899         0x3500, 0xffffffff, 0x165,
900         0x3504, 0xffffffff, 0xc094d,
901         0x3500, 0xffffffff, 0x173,
902         0x3504, 0xffffffff, 0xf096d,
903         0x3500, 0xffffffff, 0x184,
904         0x3504, 0xffffffff, 0x15097f,
905         0x3500, 0xffffffff, 0x19b,
906         0x3504, 0xffffffff, 0xc0998,
907         0x3500, 0xffffffff, 0x1a9,
908         0x3504, 0xffffffff, 0x409a7,
909         0x3500, 0xffffffff, 0x1af,
910         0x3504, 0xffffffff, 0xcdc,
911         0x3500, 0xffffffff, 0x1b1,
912         0x3504, 0xffffffff, 0x800,
913         0x3508, 0xffffffff, 0x6c9b2000,
914         0x3510, 0xfc00, 0x2000,
915         0x3544, 0xffffffff, 0xfc0,
916         0x28d4, 0x00000100, 0x100
917 };
918
919 static void si_init_golden_registers(struct radeon_device *rdev)
920 {
921         switch (rdev->family) {
922         case CHIP_TAHITI:
923                 radeon_program_register_sequence(rdev,
924                                                  tahiti_golden_registers,
925                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
926                 radeon_program_register_sequence(rdev,
927                                                  tahiti_golden_rlc_registers,
928                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
929                 radeon_program_register_sequence(rdev,
930                                                  tahiti_mgcg_cgcg_init,
931                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
932                 radeon_program_register_sequence(rdev,
933                                                  tahiti_golden_registers2,
934                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
935                 break;
936         case CHIP_PITCAIRN:
937                 radeon_program_register_sequence(rdev,
938                                                  pitcairn_golden_registers,
939                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
940                 radeon_program_register_sequence(rdev,
941                                                  pitcairn_golden_rlc_registers,
942                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
943                 radeon_program_register_sequence(rdev,
944                                                  pitcairn_mgcg_cgcg_init,
945                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
946                 break;
947         case CHIP_VERDE:
948                 radeon_program_register_sequence(rdev,
949                                                  verde_golden_registers,
950                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
951                 radeon_program_register_sequence(rdev,
952                                                  verde_golden_rlc_registers,
953                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
954                 radeon_program_register_sequence(rdev,
955                                                  verde_mgcg_cgcg_init,
956                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
957                 radeon_program_register_sequence(rdev,
958                                                  verde_pg_init,
959                                                  (const u32)ARRAY_SIZE(verde_pg_init));
960                 break;
961         case CHIP_OLAND:
962                 radeon_program_register_sequence(rdev,
963                                                  oland_golden_registers,
964                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
965                 radeon_program_register_sequence(rdev,
966                                                  oland_golden_rlc_registers,
967                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
968                 radeon_program_register_sequence(rdev,
969                                                  oland_mgcg_cgcg_init,
970                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
971                 break;
972         case CHIP_HAINAN:
973                 radeon_program_register_sequence(rdev,
974                                                  hainan_golden_registers,
975                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
976                 radeon_program_register_sequence(rdev,
977                                                  hainan_golden_registers2,
978                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
979                 radeon_program_register_sequence(rdev,
980                                                  hainan_mgcg_cgcg_init,
981                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
982                 break;
983         default:
984                 break;
985         }
986 }
987
988 #define PCIE_BUS_CLK                10000
989 #define TCLK                        (PCIE_BUS_CLK / 10)
990
991 /**
992  * si_get_xclk - get the xclk
993  *
994  * @rdev: radeon_device pointer
995  *
996  * Returns the reference clock used by the gfx engine
997  * (SI).
998  */
999 u32 si_get_xclk(struct radeon_device *rdev)
1000 {
1001         u32 reference_clock = rdev->clock.spll.reference_freq;
1002         u32 tmp;
1003
1004         tmp = RREG32(CG_CLKPIN_CNTL_2);
1005         if (tmp & MUX_TCLK_TO_XCLK)
1006                 return TCLK;
1007
1008         tmp = RREG32(CG_CLKPIN_CNTL);
1009         if (tmp & XTALIN_DIVIDE)
1010                 return reference_clock / 4;
1011
1012         return reference_clock;
1013 }
1014
1015 /* get temperature in millidegrees */
1016 int si_get_temp(struct radeon_device *rdev)
1017 {
1018         u32 temp;
1019         int actual_temp = 0;
1020
1021         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1022                 CTF_TEMP_SHIFT;
1023
1024         if (temp & 0x200)
1025                 actual_temp = 255;
1026         else
1027                 actual_temp = temp & 0x1ff;
1028
1029         actual_temp = (actual_temp * 1000);
1030
1031         return actual_temp;
1032 }
1033
1034 #define TAHITI_IO_MC_REGS_SIZE 36
1035
1036 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1037         {0x0000006f, 0x03044000},
1038         {0x00000070, 0x0480c018},
1039         {0x00000071, 0x00000040},
1040         {0x00000072, 0x01000000},
1041         {0x00000074, 0x000000ff},
1042         {0x00000075, 0x00143400},
1043         {0x00000076, 0x08ec0800},
1044         {0x00000077, 0x040000cc},
1045         {0x00000079, 0x00000000},
1046         {0x0000007a, 0x21000409},
1047         {0x0000007c, 0x00000000},
1048         {0x0000007d, 0xe8000000},
1049         {0x0000007e, 0x044408a8},
1050         {0x0000007f, 0x00000003},
1051         {0x00000080, 0x00000000},
1052         {0x00000081, 0x01000000},
1053         {0x00000082, 0x02000000},
1054         {0x00000083, 0x00000000},
1055         {0x00000084, 0xe3f3e4f4},
1056         {0x00000085, 0x00052024},
1057         {0x00000087, 0x00000000},
1058         {0x00000088, 0x66036603},
1059         {0x00000089, 0x01000000},
1060         {0x0000008b, 0x1c0a0000},
1061         {0x0000008c, 0xff010000},
1062         {0x0000008e, 0xffffefff},
1063         {0x0000008f, 0xfff3efff},
1064         {0x00000090, 0xfff3efbf},
1065         {0x00000094, 0x00101101},
1066         {0x00000095, 0x00000fff},
1067         {0x00000096, 0x00116fff},
1068         {0x00000097, 0x60010000},
1069         {0x00000098, 0x10010000},
1070         {0x00000099, 0x00006000},
1071         {0x0000009a, 0x00001000},
1072         {0x0000009f, 0x00a77400}
1073 };
1074
1075 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1076         {0x0000006f, 0x03044000},
1077         {0x00000070, 0x0480c018},
1078         {0x00000071, 0x00000040},
1079         {0x00000072, 0x01000000},
1080         {0x00000074, 0x000000ff},
1081         {0x00000075, 0x00143400},
1082         {0x00000076, 0x08ec0800},
1083         {0x00000077, 0x040000cc},
1084         {0x00000079, 0x00000000},
1085         {0x0000007a, 0x21000409},
1086         {0x0000007c, 0x00000000},
1087         {0x0000007d, 0xe8000000},
1088         {0x0000007e, 0x044408a8},
1089         {0x0000007f, 0x00000003},
1090         {0x00000080, 0x00000000},
1091         {0x00000081, 0x01000000},
1092         {0x00000082, 0x02000000},
1093         {0x00000083, 0x00000000},
1094         {0x00000084, 0xe3f3e4f4},
1095         {0x00000085, 0x00052024},
1096         {0x00000087, 0x00000000},
1097         {0x00000088, 0x66036603},
1098         {0x00000089, 0x01000000},
1099         {0x0000008b, 0x1c0a0000},
1100         {0x0000008c, 0xff010000},
1101         {0x0000008e, 0xffffefff},
1102         {0x0000008f, 0xfff3efff},
1103         {0x00000090, 0xfff3efbf},
1104         {0x00000094, 0x00101101},
1105         {0x00000095, 0x00000fff},
1106         {0x00000096, 0x00116fff},
1107         {0x00000097, 0x60010000},
1108         {0x00000098, 0x10010000},
1109         {0x00000099, 0x00006000},
1110         {0x0000009a, 0x00001000},
1111         {0x0000009f, 0x00a47400}
1112 };
1113
1114 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1115         {0x0000006f, 0x03044000},
1116         {0x00000070, 0x0480c018},
1117         {0x00000071, 0x00000040},
1118         {0x00000072, 0x01000000},
1119         {0x00000074, 0x000000ff},
1120         {0x00000075, 0x00143400},
1121         {0x00000076, 0x08ec0800},
1122         {0x00000077, 0x040000cc},
1123         {0x00000079, 0x00000000},
1124         {0x0000007a, 0x21000409},
1125         {0x0000007c, 0x00000000},
1126         {0x0000007d, 0xe8000000},
1127         {0x0000007e, 0x044408a8},
1128         {0x0000007f, 0x00000003},
1129         {0x00000080, 0x00000000},
1130         {0x00000081, 0x01000000},
1131         {0x00000082, 0x02000000},
1132         {0x00000083, 0x00000000},
1133         {0x00000084, 0xe3f3e4f4},
1134         {0x00000085, 0x00052024},
1135         {0x00000087, 0x00000000},
1136         {0x00000088, 0x66036603},
1137         {0x00000089, 0x01000000},
1138         {0x0000008b, 0x1c0a0000},
1139         {0x0000008c, 0xff010000},
1140         {0x0000008e, 0xffffefff},
1141         {0x0000008f, 0xfff3efff},
1142         {0x00000090, 0xfff3efbf},
1143         {0x00000094, 0x00101101},
1144         {0x00000095, 0x00000fff},
1145         {0x00000096, 0x00116fff},
1146         {0x00000097, 0x60010000},
1147         {0x00000098, 0x10010000},
1148         {0x00000099, 0x00006000},
1149         {0x0000009a, 0x00001000},
1150         {0x0000009f, 0x00a37400}
1151 };
1152
1153 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1154         {0x0000006f, 0x03044000},
1155         {0x00000070, 0x0480c018},
1156         {0x00000071, 0x00000040},
1157         {0x00000072, 0x01000000},
1158         {0x00000074, 0x000000ff},
1159         {0x00000075, 0x00143400},
1160         {0x00000076, 0x08ec0800},
1161         {0x00000077, 0x040000cc},
1162         {0x00000079, 0x00000000},
1163         {0x0000007a, 0x21000409},
1164         {0x0000007c, 0x00000000},
1165         {0x0000007d, 0xe8000000},
1166         {0x0000007e, 0x044408a8},
1167         {0x0000007f, 0x00000003},
1168         {0x00000080, 0x00000000},
1169         {0x00000081, 0x01000000},
1170         {0x00000082, 0x02000000},
1171         {0x00000083, 0x00000000},
1172         {0x00000084, 0xe3f3e4f4},
1173         {0x00000085, 0x00052024},
1174         {0x00000087, 0x00000000},
1175         {0x00000088, 0x66036603},
1176         {0x00000089, 0x01000000},
1177         {0x0000008b, 0x1c0a0000},
1178         {0x0000008c, 0xff010000},
1179         {0x0000008e, 0xffffefff},
1180         {0x0000008f, 0xfff3efff},
1181         {0x00000090, 0xfff3efbf},
1182         {0x00000094, 0x00101101},
1183         {0x00000095, 0x00000fff},
1184         {0x00000096, 0x00116fff},
1185         {0x00000097, 0x60010000},
1186         {0x00000098, 0x10010000},
1187         {0x00000099, 0x00006000},
1188         {0x0000009a, 0x00001000},
1189         {0x0000009f, 0x00a17730}
1190 };
1191
1192 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1193         {0x0000006f, 0x03044000},
1194         {0x00000070, 0x0480c018},
1195         {0x00000071, 0x00000040},
1196         {0x00000072, 0x01000000},
1197         {0x00000074, 0x000000ff},
1198         {0x00000075, 0x00143400},
1199         {0x00000076, 0x08ec0800},
1200         {0x00000077, 0x040000cc},
1201         {0x00000079, 0x00000000},
1202         {0x0000007a, 0x21000409},
1203         {0x0000007c, 0x00000000},
1204         {0x0000007d, 0xe8000000},
1205         {0x0000007e, 0x044408a8},
1206         {0x0000007f, 0x00000003},
1207         {0x00000080, 0x00000000},
1208         {0x00000081, 0x01000000},
1209         {0x00000082, 0x02000000},
1210         {0x00000083, 0x00000000},
1211         {0x00000084, 0xe3f3e4f4},
1212         {0x00000085, 0x00052024},
1213         {0x00000087, 0x00000000},
1214         {0x00000088, 0x66036603},
1215         {0x00000089, 0x01000000},
1216         {0x0000008b, 0x1c0a0000},
1217         {0x0000008c, 0xff010000},
1218         {0x0000008e, 0xffffefff},
1219         {0x0000008f, 0xfff3efff},
1220         {0x00000090, 0xfff3efbf},
1221         {0x00000094, 0x00101101},
1222         {0x00000095, 0x00000fff},
1223         {0x00000096, 0x00116fff},
1224         {0x00000097, 0x60010000},
1225         {0x00000098, 0x10010000},
1226         {0x00000099, 0x00006000},
1227         {0x0000009a, 0x00001000},
1228         {0x0000009f, 0x00a07730}
1229 };
1230
1231 /* ucode loading */
1232 static int si_mc_load_microcode(struct radeon_device *rdev)
1233 {
1234         const __be32 *fw_data;
1235         u32 running, blackout = 0;
1236         u32 *io_mc_regs;
1237         int i, ucode_size, regs_size;
1238
1239         if (!rdev->mc_fw)
1240                 return -EINVAL;
1241
1242         switch (rdev->family) {
1243         case CHIP_TAHITI:
1244                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1245                 ucode_size = SI_MC_UCODE_SIZE;
1246                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1247                 break;
1248         case CHIP_PITCAIRN:
1249                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1250                 ucode_size = SI_MC_UCODE_SIZE;
1251                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1252                 break;
1253         case CHIP_VERDE:
1254         default:
1255                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1256                 ucode_size = SI_MC_UCODE_SIZE;
1257                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1258                 break;
1259         case CHIP_OLAND:
1260                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1261                 ucode_size = OLAND_MC_UCODE_SIZE;
1262                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1263                 break;
1264         case CHIP_HAINAN:
1265                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1266                 ucode_size = OLAND_MC_UCODE_SIZE;
1267                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1268                 break;
1269         }
1270
1271         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1272
1273         if (running == 0) {
1274                 if (running) {
1275                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1276                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1277                 }
1278
1279                 /* reset the engine and set to writable */
1280                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1281                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1282
1283                 /* load mc io regs */
1284                 for (i = 0; i < regs_size; i++) {
1285                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1286                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1287                 }
1288                 /* load the MC ucode */
1289                 fw_data = (const __be32 *)rdev->mc_fw->data;
1290                 for (i = 0; i < ucode_size; i++)
1291                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1292
1293                 /* put the engine back into the active state */
1294                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1295                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1296                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1297
1298                 /* wait for training to complete */
1299                 for (i = 0; i < rdev->usec_timeout; i++) {
1300                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1301                                 break;
1302                         udelay(1);
1303                 }
1304                 for (i = 0; i < rdev->usec_timeout; i++) {
1305                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1306                                 break;
1307                         udelay(1);
1308                 }
1309
1310                 if (running)
1311                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1312         }
1313
1314         return 0;
1315 }
1316
1317 static int si_init_microcode(struct radeon_device *rdev)
1318 {
1319         struct platform_device *pdev;
1320         const char *chip_name;
1321         const char *rlc_chip_name;
1322         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1323         char fw_name[30];
1324         int err;
1325
1326         DRM_DEBUG("\n");
1327
1328         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1329         err = IS_ERR(pdev);
1330         if (err) {
1331                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1332                 return -EINVAL;
1333         }
1334
1335         switch (rdev->family) {
1336         case CHIP_TAHITI:
1337                 chip_name = "TAHITI";
1338                 rlc_chip_name = "TAHITI";
1339                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1340                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1341                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1342                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1343                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1344                 break;
1345         case CHIP_PITCAIRN:
1346                 chip_name = "PITCAIRN";
1347                 rlc_chip_name = "PITCAIRN";
1348                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1349                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1350                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1351                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1352                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1353                 break;
1354         case CHIP_VERDE:
1355                 chip_name = "VERDE";
1356                 rlc_chip_name = "VERDE";
1357                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1358                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1359                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1360                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1361                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1362                 break;
1363         case CHIP_OLAND:
1364                 chip_name = "OLAND";
1365                 rlc_chip_name = "OLAND";
1366                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1367                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1368                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1369                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1370                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1371                 break;
1372         case CHIP_HAINAN:
1373                 chip_name = "HAINAN";
1374                 rlc_chip_name = "HAINAN";
1375                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1376                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1377                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1378                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1379                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1380                 break;
1381         default: BUG();
1382         }
1383
1384         DRM_INFO("Loading %s Microcode\n", chip_name);
1385
1386         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1387         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1388         if (err)
1389                 goto out;
1390         if (rdev->pfp_fw->size != pfp_req_size) {
1391                 printk(KERN_ERR
1392                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1393                        rdev->pfp_fw->size, fw_name);
1394                 err = -EINVAL;
1395                 goto out;
1396         }
1397
1398         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1399         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1400         if (err)
1401                 goto out;
1402         if (rdev->me_fw->size != me_req_size) {
1403                 printk(KERN_ERR
1404                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1405                        rdev->me_fw->size, fw_name);
1406                 err = -EINVAL;
1407         }
1408
1409         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1410         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1411         if (err)
1412                 goto out;
1413         if (rdev->ce_fw->size != ce_req_size) {
1414                 printk(KERN_ERR
1415                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1416                        rdev->ce_fw->size, fw_name);
1417                 err = -EINVAL;
1418         }
1419
1420         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1421         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1422         if (err)
1423                 goto out;
1424         if (rdev->rlc_fw->size != rlc_req_size) {
1425                 printk(KERN_ERR
1426                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1427                        rdev->rlc_fw->size, fw_name);
1428                 err = -EINVAL;
1429         }
1430
1431         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1432         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1433         if (err)
1434                 goto out;
1435         if (rdev->mc_fw->size != mc_req_size) {
1436                 printk(KERN_ERR
1437                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1438                        rdev->mc_fw->size, fw_name);
1439                 err = -EINVAL;
1440         }
1441
1442 out:
1443         platform_device_unregister(pdev);
1444
1445         if (err) {
1446                 if (err != -EINVAL)
1447                         printk(KERN_ERR
1448                                "si_cp: Failed to load firmware \"%s\"\n",
1449                                fw_name);
1450                 release_firmware(rdev->pfp_fw);
1451                 rdev->pfp_fw = NULL;
1452                 release_firmware(rdev->me_fw);
1453                 rdev->me_fw = NULL;
1454                 release_firmware(rdev->ce_fw);
1455                 rdev->ce_fw = NULL;
1456                 release_firmware(rdev->rlc_fw);
1457                 rdev->rlc_fw = NULL;
1458                 release_firmware(rdev->mc_fw);
1459                 rdev->mc_fw = NULL;
1460         }
1461         return err;
1462 }
1463
1464 /* watermark setup */
1465 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1466                                    struct radeon_crtc *radeon_crtc,
1467                                    struct drm_display_mode *mode,
1468                                    struct drm_display_mode *other_mode)
1469 {
1470         u32 tmp;
1471         /*
1472          * Line Buffer Setup
1473          * There are 3 line buffers, each one shared by 2 display controllers.
1474          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1475          * the display controllers.  The paritioning is done via one of four
1476          * preset allocations specified in bits 21:20:
1477          *  0 - half lb
1478          *  2 - whole lb, other crtc must be disabled
1479          */
1480         /* this can get tricky if we have two large displays on a paired group
1481          * of crtcs.  Ideally for multiple large displays we'd assign them to
1482          * non-linked crtcs for maximum line buffer allocation.
1483          */
1484         if (radeon_crtc->base.enabled && mode) {
1485                 if (other_mode)
1486                         tmp = 0; /* 1/2 */
1487                 else
1488                         tmp = 2; /* whole */
1489         } else
1490                 tmp = 0;
1491
1492         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1493                DC_LB_MEMORY_CONFIG(tmp));
1494
1495         if (radeon_crtc->base.enabled && mode) {
1496                 switch (tmp) {
1497                 case 0:
1498                 default:
1499                         return 4096 * 2;
1500                 case 2:
1501                         return 8192 * 2;
1502                 }
1503         }
1504
1505         /* controller not enabled, so no lb used */
1506         return 0;
1507 }
1508
1509 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1510 {
1511         u32 tmp = RREG32(MC_SHARED_CHMAP);
1512
1513         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1514         case 0:
1515         default:
1516                 return 1;
1517         case 1:
1518                 return 2;
1519         case 2:
1520                 return 4;
1521         case 3:
1522                 return 8;
1523         case 4:
1524                 return 3;
1525         case 5:
1526                 return 6;
1527         case 6:
1528                 return 10;
1529         case 7:
1530                 return 12;
1531         case 8:
1532                 return 16;
1533         }
1534 }
1535
1536 struct dce6_wm_params {
1537         u32 dram_channels; /* number of dram channels */
1538         u32 yclk;          /* bandwidth per dram data pin in kHz */
1539         u32 sclk;          /* engine clock in kHz */
1540         u32 disp_clk;      /* display clock in kHz */
1541         u32 src_width;     /* viewport width */
1542         u32 active_time;   /* active display time in ns */
1543         u32 blank_time;    /* blank time in ns */
1544         bool interlaced;    /* mode is interlaced */
1545         fixed20_12 vsc;    /* vertical scale ratio */
1546         u32 num_heads;     /* number of active crtcs */
1547         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1548         u32 lb_size;       /* line buffer allocated to pipe */
1549         u32 vtaps;         /* vertical scaler taps */
1550 };
1551
1552 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1553 {
1554         /* Calculate raw DRAM Bandwidth */
1555         fixed20_12 dram_efficiency; /* 0.7 */
1556         fixed20_12 yclk, dram_channels, bandwidth;
1557         fixed20_12 a;
1558
1559         a.full = dfixed_const(1000);
1560         yclk.full = dfixed_const(wm->yclk);
1561         yclk.full = dfixed_div(yclk, a);
1562         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1563         a.full = dfixed_const(10);
1564         dram_efficiency.full = dfixed_const(7);
1565         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1566         bandwidth.full = dfixed_mul(dram_channels, yclk);
1567         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1568
1569         return dfixed_trunc(bandwidth);
1570 }
1571
1572 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1573 {
1574         /* Calculate DRAM Bandwidth and the part allocated to display. */
1575         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1576         fixed20_12 yclk, dram_channels, bandwidth;
1577         fixed20_12 a;
1578
1579         a.full = dfixed_const(1000);
1580         yclk.full = dfixed_const(wm->yclk);
1581         yclk.full = dfixed_div(yclk, a);
1582         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1583         a.full = dfixed_const(10);
1584         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1585         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1586         bandwidth.full = dfixed_mul(dram_channels, yclk);
1587         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1588
1589         return dfixed_trunc(bandwidth);
1590 }
1591
1592 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1593 {
1594         /* Calculate the display Data return Bandwidth */
1595         fixed20_12 return_efficiency; /* 0.8 */
1596         fixed20_12 sclk, bandwidth;
1597         fixed20_12 a;
1598
1599         a.full = dfixed_const(1000);
1600         sclk.full = dfixed_const(wm->sclk);
1601         sclk.full = dfixed_div(sclk, a);
1602         a.full = dfixed_const(10);
1603         return_efficiency.full = dfixed_const(8);
1604         return_efficiency.full = dfixed_div(return_efficiency, a);
1605         a.full = dfixed_const(32);
1606         bandwidth.full = dfixed_mul(a, sclk);
1607         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1608
1609         return dfixed_trunc(bandwidth);
1610 }
1611
1612 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1613 {
1614         return 32;
1615 }
1616
1617 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1618 {
1619         /* Calculate the DMIF Request Bandwidth */
1620         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1621         fixed20_12 disp_clk, sclk, bandwidth;
1622         fixed20_12 a, b1, b2;
1623         u32 min_bandwidth;
1624
1625         a.full = dfixed_const(1000);
1626         disp_clk.full = dfixed_const(wm->disp_clk);
1627         disp_clk.full = dfixed_div(disp_clk, a);
1628         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1629         b1.full = dfixed_mul(a, disp_clk);
1630
1631         a.full = dfixed_const(1000);
1632         sclk.full = dfixed_const(wm->sclk);
1633         sclk.full = dfixed_div(sclk, a);
1634         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1635         b2.full = dfixed_mul(a, sclk);
1636
1637         a.full = dfixed_const(10);
1638         disp_clk_request_efficiency.full = dfixed_const(8);
1639         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1640
1641         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1642
1643         a.full = dfixed_const(min_bandwidth);
1644         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1645
1646         return dfixed_trunc(bandwidth);
1647 }
1648
1649 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1650 {
1651         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1652         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1653         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1654         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1655
1656         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1657 }
1658
1659 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1660 {
1661         /* Calculate the display mode Average Bandwidth
1662          * DisplayMode should contain the source and destination dimensions,
1663          * timing, etc.
1664          */
1665         fixed20_12 bpp;
1666         fixed20_12 line_time;
1667         fixed20_12 src_width;
1668         fixed20_12 bandwidth;
1669         fixed20_12 a;
1670
1671         a.full = dfixed_const(1000);
1672         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1673         line_time.full = dfixed_div(line_time, a);
1674         bpp.full = dfixed_const(wm->bytes_per_pixel);
1675         src_width.full = dfixed_const(wm->src_width);
1676         bandwidth.full = dfixed_mul(src_width, bpp);
1677         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1678         bandwidth.full = dfixed_div(bandwidth, line_time);
1679
1680         return dfixed_trunc(bandwidth);
1681 }
1682
1683 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1684 {
1685         /* First calcualte the latency in ns */
1686         u32 mc_latency = 2000; /* 2000 ns. */
1687         u32 available_bandwidth = dce6_available_bandwidth(wm);
1688         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1689         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1690         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1691         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1692                 (wm->num_heads * cursor_line_pair_return_time);
1693         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1694         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1695         u32 tmp, dmif_size = 12288;
1696         fixed20_12 a, b, c;
1697
1698         if (wm->num_heads == 0)
1699                 return 0;
1700
1701         a.full = dfixed_const(2);
1702         b.full = dfixed_const(1);
1703         if ((wm->vsc.full > a.full) ||
1704             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1705             (wm->vtaps >= 5) ||
1706             ((wm->vsc.full >= a.full) && wm->interlaced))
1707                 max_src_lines_per_dst_line = 4;
1708         else
1709                 max_src_lines_per_dst_line = 2;
1710
1711         a.full = dfixed_const(available_bandwidth);
1712         b.full = dfixed_const(wm->num_heads);
1713         a.full = dfixed_div(a, b);
1714
1715         b.full = dfixed_const(mc_latency + 512);
1716         c.full = dfixed_const(wm->disp_clk);
1717         b.full = dfixed_div(b, c);
1718
1719         c.full = dfixed_const(dmif_size);
1720         b.full = dfixed_div(c, b);
1721
1722         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1723
1724         b.full = dfixed_const(1000);
1725         c.full = dfixed_const(wm->disp_clk);
1726         b.full = dfixed_div(c, b);
1727         c.full = dfixed_const(wm->bytes_per_pixel);
1728         b.full = dfixed_mul(b, c);
1729
1730         lb_fill_bw = min(tmp, dfixed_trunc(b));
1731
1732         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1733         b.full = dfixed_const(1000);
1734         c.full = dfixed_const(lb_fill_bw);
1735         b.full = dfixed_div(c, b);
1736         a.full = dfixed_div(a, b);
1737         line_fill_time = dfixed_trunc(a);
1738
1739         if (line_fill_time < wm->active_time)
1740                 return latency;
1741         else
1742                 return latency + (line_fill_time - wm->active_time);
1743
1744 }
1745
1746 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1747 {
1748         if (dce6_average_bandwidth(wm) <=
1749             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1750                 return true;
1751         else
1752                 return false;
1753 };
1754
1755 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1756 {
1757         if (dce6_average_bandwidth(wm) <=
1758             (dce6_available_bandwidth(wm) / wm->num_heads))
1759                 return true;
1760         else
1761                 return false;
1762 };
1763
1764 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1765 {
1766         u32 lb_partitions = wm->lb_size / wm->src_width;
1767         u32 line_time = wm->active_time + wm->blank_time;
1768         u32 latency_tolerant_lines;
1769         u32 latency_hiding;
1770         fixed20_12 a;
1771
1772         a.full = dfixed_const(1);
1773         if (wm->vsc.full > a.full)
1774                 latency_tolerant_lines = 1;
1775         else {
1776                 if (lb_partitions <= (wm->vtaps + 1))
1777                         latency_tolerant_lines = 1;
1778                 else
1779                         latency_tolerant_lines = 2;
1780         }
1781
1782         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1783
1784         if (dce6_latency_watermark(wm) <= latency_hiding)
1785                 return true;
1786         else
1787                 return false;
1788 }
1789
1790 static void dce6_program_watermarks(struct radeon_device *rdev,
1791                                          struct radeon_crtc *radeon_crtc,
1792                                          u32 lb_size, u32 num_heads)
1793 {
1794         struct drm_display_mode *mode = &radeon_crtc->base.mode;
1795         struct dce6_wm_params wm_low, wm_high;
1796         u32 dram_channels;
1797         u32 pixel_period;
1798         u32 line_time = 0;
1799         u32 latency_watermark_a = 0, latency_watermark_b = 0;
1800         u32 priority_a_mark = 0, priority_b_mark = 0;
1801         u32 priority_a_cnt = PRIORITY_OFF;
1802         u32 priority_b_cnt = PRIORITY_OFF;
1803         u32 tmp, arb_control3;
1804         fixed20_12 a, b, c;
1805
1806         if (radeon_crtc->base.enabled && num_heads && mode) {
1807                 pixel_period = 1000000 / (u32)mode->clock;
1808                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1809                 priority_a_cnt = 0;
1810                 priority_b_cnt = 0;
1811
1812                 if (rdev->family == CHIP_ARUBA)
1813                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
1814                 else
1815                         dram_channels = si_get_number_of_dram_channels(rdev);
1816
1817                 /* watermark for high clocks */
1818                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
1819                         wm_high.yclk =
1820                                 radeon_dpm_get_mclk(rdev, false) * 10;
1821                         wm_high.sclk =
1822                                 radeon_dpm_get_sclk(rdev, false) * 10;
1823                 } else {
1824                         wm_high.yclk = rdev->pm.current_mclk * 10;
1825                         wm_high.sclk = rdev->pm.current_sclk * 10;
1826                 }
1827
1828                 wm_high.disp_clk = mode->clock;
1829                 wm_high.src_width = mode->crtc_hdisplay;
1830                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
1831                 wm_high.blank_time = line_time - wm_high.active_time;
1832                 wm_high.interlaced = false;
1833                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1834                         wm_high.interlaced = true;
1835                 wm_high.vsc = radeon_crtc->vsc;
1836                 wm_high.vtaps = 1;
1837                 if (radeon_crtc->rmx_type != RMX_OFF)
1838                         wm_high.vtaps = 2;
1839                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
1840                 wm_high.lb_size = lb_size;
1841                 wm_high.dram_channels = dram_channels;
1842                 wm_high.num_heads = num_heads;
1843
1844                 /* watermark for low clocks */
1845                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
1846                         wm_low.yclk =
1847                                 radeon_dpm_get_mclk(rdev, true) * 10;
1848                         wm_low.sclk =
1849                                 radeon_dpm_get_sclk(rdev, true) * 10;
1850                 } else {
1851                         wm_low.yclk = rdev->pm.current_mclk * 10;
1852                         wm_low.sclk = rdev->pm.current_sclk * 10;
1853                 }
1854
1855                 wm_low.disp_clk = mode->clock;
1856                 wm_low.src_width = mode->crtc_hdisplay;
1857                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
1858                 wm_low.blank_time = line_time - wm_low.active_time;
1859                 wm_low.interlaced = false;
1860                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1861                         wm_low.interlaced = true;
1862                 wm_low.vsc = radeon_crtc->vsc;
1863                 wm_low.vtaps = 1;
1864                 if (radeon_crtc->rmx_type != RMX_OFF)
1865                         wm_low.vtaps = 2;
1866                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
1867                 wm_low.lb_size = lb_size;
1868                 wm_low.dram_channels = dram_channels;
1869                 wm_low.num_heads = num_heads;
1870
1871                 /* set for high clocks */
1872                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
1873                 /* set for low clocks */
1874                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
1875
1876                 /* possibly force display priority to high */
1877                 /* should really do this at mode validation time... */
1878                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
1879                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
1880                     !dce6_check_latency_hiding(&wm_high) ||
1881                     (rdev->disp_priority == 2)) {
1882                         DRM_DEBUG_KMS("force priority to high\n");
1883                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1884                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1885                 }
1886                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
1887                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
1888                     !dce6_check_latency_hiding(&wm_low) ||
1889                     (rdev->disp_priority == 2)) {
1890                         DRM_DEBUG_KMS("force priority to high\n");
1891                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1892                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1893                 }
1894
1895                 a.full = dfixed_const(1000);
1896                 b.full = dfixed_const(mode->clock);
1897                 b.full = dfixed_div(b, a);
1898                 c.full = dfixed_const(latency_watermark_a);
1899                 c.full = dfixed_mul(c, b);
1900                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1901                 c.full = dfixed_div(c, a);
1902                 a.full = dfixed_const(16);
1903                 c.full = dfixed_div(c, a);
1904                 priority_a_mark = dfixed_trunc(c);
1905                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1906
1907                 a.full = dfixed_const(1000);
1908                 b.full = dfixed_const(mode->clock);
1909                 b.full = dfixed_div(b, a);
1910                 c.full = dfixed_const(latency_watermark_b);
1911                 c.full = dfixed_mul(c, b);
1912                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1913                 c.full = dfixed_div(c, a);
1914                 a.full = dfixed_const(16);
1915                 c.full = dfixed_div(c, a);
1916                 priority_b_mark = dfixed_trunc(c);
1917                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1918         }
1919
1920         /* select wm A */
1921         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1922         tmp = arb_control3;
1923         tmp &= ~LATENCY_WATERMARK_MASK(3);
1924         tmp |= LATENCY_WATERMARK_MASK(1);
1925         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1926         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1927                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1928                 LATENCY_HIGH_WATERMARK(line_time)));
1929         /* select wm B */
1930         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1931         tmp &= ~LATENCY_WATERMARK_MASK(3);
1932         tmp |= LATENCY_WATERMARK_MASK(2);
1933         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1934         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1935                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1936                 LATENCY_HIGH_WATERMARK(line_time)));
1937         /* restore original selection */
1938         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1939
1940         /* write the priority marks */
1941         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1942         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1943
1944 }
1945
1946 void dce6_bandwidth_update(struct radeon_device *rdev)
1947 {
1948         struct drm_display_mode *mode0 = NULL;
1949         struct drm_display_mode *mode1 = NULL;
1950         u32 num_heads = 0, lb_size;
1951         int i;
1952
1953         radeon_update_display_priority(rdev);
1954
1955         for (i = 0; i < rdev->num_crtc; i++) {
1956                 if (rdev->mode_info.crtcs[i]->base.enabled)
1957                         num_heads++;
1958         }
1959         for (i = 0; i < rdev->num_crtc; i += 2) {
1960                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1961                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1962                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1963                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1964                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1965                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1966         }
1967 }
1968
1969 /*
1970  * Core functions
1971  */
1972 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1973 {
1974         const u32 num_tile_mode_states = 32;
1975         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1976
1977         switch (rdev->config.si.mem_row_size_in_kb) {
1978         case 1:
1979                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1980                 break;
1981         case 2:
1982         default:
1983                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1984                 break;
1985         case 4:
1986                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1987                 break;
1988         }
1989
1990         if ((rdev->family == CHIP_TAHITI) ||
1991             (rdev->family == CHIP_PITCAIRN)) {
1992                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1993                         switch (reg_offset) {
1994                         case 0:  /* non-AA compressed depth or any compressed stencil */
1995                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1996                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1997                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1998                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1999                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2000                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2001                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2002                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2003                                 break;
2004                         case 1:  /* 2xAA/4xAA compressed depth only */
2005                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2006                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2007                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2008                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2009                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2010                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2011                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2012                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2013                                 break;
2014                         case 2:  /* 8xAA compressed depth only */
2015                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2016                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2017                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2018                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2019                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2020                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2021                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2022                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2023                                 break;
2024                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2025                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2026                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2027                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2028                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2029                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2030                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2031                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2032                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2033                                 break;
2034                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2035                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2036                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2037                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2038                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2039                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2040                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2041                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2042                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2043                                 break;
2044                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2045                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2047                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2048                                                  TILE_SPLIT(split_equal_to_row_size) |
2049                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2050                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2051                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2052                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2053                                 break;
2054                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2055                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2057                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2058                                                  TILE_SPLIT(split_equal_to_row_size) |
2059                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2060                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2061                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2062                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2063                                 break;
2064                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2065                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2066                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2067                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2068                                                  TILE_SPLIT(split_equal_to_row_size) |
2069                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2070                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2071                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2072                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2073                                 break;
2074                         case 8:  /* 1D and 1D Array Surfaces */
2075                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2076                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2077                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2078                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2079                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2080                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2081                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2082                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2083                                 break;
2084                         case 9:  /* Displayable maps. */
2085                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2087                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2088                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2089                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2090                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2091                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2092                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2093                                 break;
2094                         case 10:  /* Display 8bpp. */
2095                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2096                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2097                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2098                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2099                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2100                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2101                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2102                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2103                                 break;
2104                         case 11:  /* Display 16bpp. */
2105                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2106                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2107                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2108                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2109                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2110                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2111                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2112                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2113                                 break;
2114                         case 12:  /* Display 32bpp. */
2115                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2116                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2117                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2118                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2119                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2120                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2121                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2122                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2123                                 break;
2124                         case 13:  /* Thin. */
2125                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2126                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2127                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2128                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2129                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2130                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2131                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2132                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2133                                 break;
2134                         case 14:  /* Thin 8 bpp. */
2135                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2137                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2138                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2139                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2140                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2141                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2143                                 break;
2144                         case 15:  /* Thin 16 bpp. */
2145                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2146                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2147                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2148                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2149                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2150                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2151                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2152                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2153                                 break;
2154                         case 16:  /* Thin 32 bpp. */
2155                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2157                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2158                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2159                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2160                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2162                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2163                                 break;
2164                         case 17:  /* Thin 64 bpp. */
2165                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2166                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2167                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2168                                                  TILE_SPLIT(split_equal_to_row_size) |
2169                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2170                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2172                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2173                                 break;
2174                         case 21:  /* 8 bpp PRT. */
2175                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2176                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2177                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2178                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2179                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2180                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2181                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2182                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2183                                 break;
2184                         case 22:  /* 16 bpp PRT */
2185                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2186                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2187                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2188                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2189                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2190                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2192                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2193                                 break;
2194                         case 23:  /* 32 bpp PRT */
2195                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2196                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2197                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2198                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2199                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2200                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2201                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2202                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2203                                 break;
2204                         case 24:  /* 64 bpp PRT */
2205                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2206                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2207                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2208                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2209                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2210                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2213                                 break;
2214                         case 25:  /* 128 bpp PRT */
2215                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2216                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2217                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2218                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2219                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2220                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2221                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2222                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2223                                 break;
2224                         default:
2225                                 gb_tile_moden = 0;
2226                                 break;
2227                         }
2228                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2229                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2230                 }
2231         } else if ((rdev->family == CHIP_VERDE) ||
2232                    (rdev->family == CHIP_OLAND) ||
2233                    (rdev->family == CHIP_HAINAN)) {
2234                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2235                         switch (reg_offset) {
2236                         case 0:  /* non-AA compressed depth or any compressed stencil */
2237                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2239                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2240                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2241                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2242                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2244                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2245                                 break;
2246                         case 1:  /* 2xAA/4xAA compressed depth only */
2247                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2248                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2249                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2250                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2251                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2252                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2254                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2255                                 break;
2256                         case 2:  /* 8xAA compressed depth only */
2257                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2259                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2260                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2261                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2262                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2264                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2265                                 break;
2266                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2267                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2268                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2269                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2270                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2271                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2272                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2274                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2275                                 break;
2276                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2277                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2278                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2279                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2280                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2281                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2282                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2283                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2284                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2285                                 break;
2286                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2287                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2288                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2289                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2290                                                  TILE_SPLIT(split_equal_to_row_size) |
2291                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2292                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2293                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2294                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2295                                 break;
2296                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2297                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2299                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2300                                                  TILE_SPLIT(split_equal_to_row_size) |
2301                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2302                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2304                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2305                                 break;
2306                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2307                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2309                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2310                                                  TILE_SPLIT(split_equal_to_row_size) |
2311                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2312                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2313                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2314                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2315                                 break;
2316                         case 8:  /* 1D and 1D Array Surfaces */
2317                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2318                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2319                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2320                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2321                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2322                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2323                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2324                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2325                                 break;
2326                         case 9:  /* Displayable maps. */
2327                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2328                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2329                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2330                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2331                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2332                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2333                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2334                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2335                                 break;
2336                         case 10:  /* Display 8bpp. */
2337                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2338                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2339                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2340                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2341                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2342                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2345                                 break;
2346                         case 11:  /* Display 16bpp. */
2347                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2348                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2349                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2350                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2351                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2352                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2353                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2354                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2355                                 break;
2356                         case 12:  /* Display 32bpp. */
2357                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2359                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2360                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2361                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2362                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2364                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2365                                 break;
2366                         case 13:  /* Thin. */
2367                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2368                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2369                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2370                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2371                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2372                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2374                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2375                                 break;
2376                         case 14:  /* Thin 8 bpp. */
2377                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2379                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2380                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2381                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2382                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2384                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2385                                 break;
2386                         case 15:  /* Thin 16 bpp. */
2387                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2388                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2389                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2390                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2391                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2392                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2395                                 break;
2396                         case 16:  /* Thin 32 bpp. */
2397                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2399                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2400                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2401                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2402                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2403                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2404                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2405                                 break;
2406                         case 17:  /* Thin 64 bpp. */
2407                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2409                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2410                                                  TILE_SPLIT(split_equal_to_row_size) |
2411                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2412                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2414                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2415                                 break;
2416                         case 21:  /* 8 bpp PRT. */
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2419                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2420                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2421                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2422                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2423                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2424                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2425                                 break;
2426                         case 22:  /* 16 bpp PRT */
2427                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2429                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2430                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2431                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2432                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2434                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2435                                 break;
2436                         case 23:  /* 32 bpp PRT */
2437                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2438                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2439                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2440                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2441                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2442                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2444                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2445                                 break;
2446                         case 24:  /* 64 bpp PRT */
2447                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2449                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2450                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2451                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2452                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2455                                 break;
2456                         case 25:  /* 128 bpp PRT */
2457                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2459                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2460                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2461                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2462                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2465                                 break;
2466                         default:
2467                                 gb_tile_moden = 0;
2468                                 break;
2469                         }
2470                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2471                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2472                 }
2473         } else
2474                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2475 }
2476
2477 static void si_select_se_sh(struct radeon_device *rdev,
2478                             u32 se_num, u32 sh_num)
2479 {
2480         u32 data = INSTANCE_BROADCAST_WRITES;
2481
2482         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2483                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2484         else if (se_num == 0xffffffff)
2485                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2486         else if (sh_num == 0xffffffff)
2487                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2488         else
2489                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2490         WREG32(GRBM_GFX_INDEX, data);
2491 }
2492
2493 static u32 si_create_bitmask(u32 bit_width)
2494 {
2495         u32 i, mask = 0;
2496
2497         for (i = 0; i < bit_width; i++) {
2498                 mask <<= 1;
2499                 mask |= 1;
2500         }
2501         return mask;
2502 }
2503
2504 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2505 {
2506         u32 data, mask;
2507
2508         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2509         if (data & 1)
2510                 data &= INACTIVE_CUS_MASK;
2511         else
2512                 data = 0;
2513         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2514
2515         data >>= INACTIVE_CUS_SHIFT;
2516
2517         mask = si_create_bitmask(cu_per_sh);
2518
2519         return ~data & mask;
2520 }
2521
2522 static void si_setup_spi(struct radeon_device *rdev,
2523                          u32 se_num, u32 sh_per_se,
2524                          u32 cu_per_sh)
2525 {
2526         int i, j, k;
2527         u32 data, mask, active_cu;
2528
2529         for (i = 0; i < se_num; i++) {
2530                 for (j = 0; j < sh_per_se; j++) {
2531                         si_select_se_sh(rdev, i, j);
2532                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2533                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2534
2535                         mask = 1;
2536                         for (k = 0; k < 16; k++) {
2537                                 mask <<= k;
2538                                 if (active_cu & mask) {
2539                                         data &= ~mask;
2540                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2541                                         break;
2542                                 }
2543                         }
2544                 }
2545         }
2546         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2547 }
2548
2549 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2550                               u32 max_rb_num, u32 se_num,
2551                               u32 sh_per_se)
2552 {
2553         u32 data, mask;
2554
2555         data = RREG32(CC_RB_BACKEND_DISABLE);
2556         if (data & 1)
2557                 data &= BACKEND_DISABLE_MASK;
2558         else
2559                 data = 0;
2560         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2561
2562         data >>= BACKEND_DISABLE_SHIFT;
2563
2564         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2565
2566         return data & mask;
2567 }
2568
2569 static void si_setup_rb(struct radeon_device *rdev,
2570                         u32 se_num, u32 sh_per_se,
2571                         u32 max_rb_num)
2572 {
2573         int i, j;
2574         u32 data, mask;
2575         u32 disabled_rbs = 0;
2576         u32 enabled_rbs = 0;
2577
2578         for (i = 0; i < se_num; i++) {
2579                 for (j = 0; j < sh_per_se; j++) {
2580                         si_select_se_sh(rdev, i, j);
2581                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2582                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2583                 }
2584         }
2585         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2586
2587         mask = 1;
2588         for (i = 0; i < max_rb_num; i++) {
2589                 if (!(disabled_rbs & mask))
2590                         enabled_rbs |= mask;
2591                 mask <<= 1;
2592         }
2593
2594         for (i = 0; i < se_num; i++) {
2595                 si_select_se_sh(rdev, i, 0xffffffff);
2596                 data = 0;
2597                 for (j = 0; j < sh_per_se; j++) {
2598                         switch (enabled_rbs & 3) {
2599                         case 1:
2600                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2601                                 break;
2602                         case 2:
2603                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2604                                 break;
2605                         case 3:
2606                         default:
2607                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2608                                 break;
2609                         }
2610                         enabled_rbs >>= 2;
2611                 }
2612                 WREG32(PA_SC_RASTER_CONFIG, data);
2613         }
2614         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2615 }
2616
2617 static void si_gpu_init(struct radeon_device *rdev)
2618 {
2619         u32 gb_addr_config = 0;
2620         u32 mc_shared_chmap, mc_arb_ramcfg;
2621         u32 sx_debug_1;
2622         u32 hdp_host_path_cntl;
2623         u32 tmp;
2624         int i, j;
2625
2626         switch (rdev->family) {
2627         case CHIP_TAHITI:
2628                 rdev->config.si.max_shader_engines = 2;
2629                 rdev->config.si.max_tile_pipes = 12;
2630                 rdev->config.si.max_cu_per_sh = 8;
2631                 rdev->config.si.max_sh_per_se = 2;
2632                 rdev->config.si.max_backends_per_se = 4;
2633                 rdev->config.si.max_texture_channel_caches = 12;
2634                 rdev->config.si.max_gprs = 256;
2635                 rdev->config.si.max_gs_threads = 32;
2636                 rdev->config.si.max_hw_contexts = 8;
2637
2638                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2639                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2640                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2641                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2642                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2643                 break;
2644         case CHIP_PITCAIRN:
2645                 rdev->config.si.max_shader_engines = 2;
2646                 rdev->config.si.max_tile_pipes = 8;
2647                 rdev->config.si.max_cu_per_sh = 5;
2648                 rdev->config.si.max_sh_per_se = 2;
2649                 rdev->config.si.max_backends_per_se = 4;
2650                 rdev->config.si.max_texture_channel_caches = 8;
2651                 rdev->config.si.max_gprs = 256;
2652                 rdev->config.si.max_gs_threads = 32;
2653                 rdev->config.si.max_hw_contexts = 8;
2654
2655                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2656                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2657                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2658                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2659                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2660                 break;
2661         case CHIP_VERDE:
2662         default:
2663                 rdev->config.si.max_shader_engines = 1;
2664                 rdev->config.si.max_tile_pipes = 4;
2665                 rdev->config.si.max_cu_per_sh = 5;
2666                 rdev->config.si.max_sh_per_se = 2;
2667                 rdev->config.si.max_backends_per_se = 4;
2668                 rdev->config.si.max_texture_channel_caches = 4;
2669                 rdev->config.si.max_gprs = 256;
2670                 rdev->config.si.max_gs_threads = 32;
2671                 rdev->config.si.max_hw_contexts = 8;
2672
2673                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2674                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2675                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2676                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2677                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2678                 break;
2679         case CHIP_OLAND:
2680                 rdev->config.si.max_shader_engines = 1;
2681                 rdev->config.si.max_tile_pipes = 4;
2682                 rdev->config.si.max_cu_per_sh = 6;
2683                 rdev->config.si.max_sh_per_se = 1;
2684                 rdev->config.si.max_backends_per_se = 2;
2685                 rdev->config.si.max_texture_channel_caches = 4;
2686                 rdev->config.si.max_gprs = 256;
2687                 rdev->config.si.max_gs_threads = 16;
2688                 rdev->config.si.max_hw_contexts = 8;
2689
2690                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2691                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2692                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2693                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2694                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2695                 break;
2696         case CHIP_HAINAN:
2697                 rdev->config.si.max_shader_engines = 1;
2698                 rdev->config.si.max_tile_pipes = 4;
2699                 rdev->config.si.max_cu_per_sh = 5;
2700                 rdev->config.si.max_sh_per_se = 1;
2701                 rdev->config.si.max_backends_per_se = 1;
2702                 rdev->config.si.max_texture_channel_caches = 2;
2703                 rdev->config.si.max_gprs = 256;
2704                 rdev->config.si.max_gs_threads = 16;
2705                 rdev->config.si.max_hw_contexts = 8;
2706
2707                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2708                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2709                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2710                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2711                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2712                 break;
2713         }
2714
2715         /* Initialize HDP */
2716         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2717                 WREG32((0x2c14 + j), 0x00000000);
2718                 WREG32((0x2c18 + j), 0x00000000);
2719                 WREG32((0x2c1c + j), 0x00000000);
2720                 WREG32((0x2c20 + j), 0x00000000);
2721                 WREG32((0x2c24 + j), 0x00000000);
2722         }
2723
2724         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2725
2726         evergreen_fix_pci_max_read_req_size(rdev);
2727
2728         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2729
2730         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2731         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2732
2733         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2734         rdev->config.si.mem_max_burst_length_bytes = 256;
2735         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2736         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2737         if (rdev->config.si.mem_row_size_in_kb > 4)
2738                 rdev->config.si.mem_row_size_in_kb = 4;
2739         /* XXX use MC settings? */
2740         rdev->config.si.shader_engine_tile_size = 32;
2741         rdev->config.si.num_gpus = 1;
2742         rdev->config.si.multi_gpu_tile_size = 64;
2743
2744         /* fix up row size */
2745         gb_addr_config &= ~ROW_SIZE_MASK;
2746         switch (rdev->config.si.mem_row_size_in_kb) {
2747         case 1:
2748         default:
2749                 gb_addr_config |= ROW_SIZE(0);
2750                 break;
2751         case 2:
2752                 gb_addr_config |= ROW_SIZE(1);
2753                 break;
2754         case 4:
2755                 gb_addr_config |= ROW_SIZE(2);
2756                 break;
2757         }
2758
2759         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2760          * not have bank info, so create a custom tiling dword.
2761          * bits 3:0   num_pipes
2762          * bits 7:4   num_banks
2763          * bits 11:8  group_size
2764          * bits 15:12 row_size
2765          */
2766         rdev->config.si.tile_config = 0;
2767         switch (rdev->config.si.num_tile_pipes) {
2768         case 1:
2769                 rdev->config.si.tile_config |= (0 << 0);
2770                 break;
2771         case 2:
2772                 rdev->config.si.tile_config |= (1 << 0);
2773                 break;
2774         case 4:
2775                 rdev->config.si.tile_config |= (2 << 0);
2776                 break;
2777         case 8:
2778         default:
2779                 /* XXX what about 12? */
2780                 rdev->config.si.tile_config |= (3 << 0);
2781                 break;
2782         }       
2783         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2784         case 0: /* four banks */
2785                 rdev->config.si.tile_config |= 0 << 4;
2786                 break;
2787         case 1: /* eight banks */
2788                 rdev->config.si.tile_config |= 1 << 4;
2789                 break;
2790         case 2: /* sixteen banks */
2791         default:
2792                 rdev->config.si.tile_config |= 2 << 4;
2793                 break;
2794         }
2795         rdev->config.si.tile_config |=
2796                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2797         rdev->config.si.tile_config |=
2798                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2799
2800         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2801         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2802         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2803         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2804         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2805         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2806         if (rdev->has_uvd) {
2807                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2808                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2809                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2810         }
2811
2812         si_tiling_mode_table_init(rdev);
2813
2814         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2815                     rdev->config.si.max_sh_per_se,
2816                     rdev->config.si.max_backends_per_se);
2817
2818         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2819                      rdev->config.si.max_sh_per_se,
2820                      rdev->config.si.max_cu_per_sh);
2821
2822
2823         /* set HW defaults for 3D engine */
2824         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2825                                      ROQ_IB2_START(0x2b)));
2826         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2827
2828         sx_debug_1 = RREG32(SX_DEBUG_1);
2829         WREG32(SX_DEBUG_1, sx_debug_1);
2830
2831         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2832
2833         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2834                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2835                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2836                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2837
2838         WREG32(VGT_NUM_INSTANCES, 1);
2839
2840         WREG32(CP_PERFMON_CNTL, 0);
2841
2842         WREG32(SQ_CONFIG, 0);
2843
2844         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2845                                           FORCE_EOV_MAX_REZ_CNT(255)));
2846
2847         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2848                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2849
2850         WREG32(VGT_GS_VERTEX_REUSE, 16);
2851         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2852
2853         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2854         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2855         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2856         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2857         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2858         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2859         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2860         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2861
2862         tmp = RREG32(HDP_MISC_CNTL);
2863         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2864         WREG32(HDP_MISC_CNTL, tmp);
2865
2866         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2867         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2868
2869         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2870
2871         udelay(50);
2872 }
2873
2874 /*
2875  * GPU scratch registers helpers function.
2876  */
2877 static void si_scratch_init(struct radeon_device *rdev)
2878 {
2879         int i;
2880
2881         rdev->scratch.num_reg = 7;
2882         rdev->scratch.reg_base = SCRATCH_REG0;
2883         for (i = 0; i < rdev->scratch.num_reg; i++) {
2884                 rdev->scratch.free[i] = true;
2885                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2886         }
2887 }
2888
2889 void si_fence_ring_emit(struct radeon_device *rdev,
2890                         struct radeon_fence *fence)
2891 {
2892         struct radeon_ring *ring = &rdev->ring[fence->ring];
2893         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2894
2895         /* flush read cache over gart */
2896         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2897         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2898         radeon_ring_write(ring, 0);
2899         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2900         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2901                           PACKET3_TC_ACTION_ENA |
2902                           PACKET3_SH_KCACHE_ACTION_ENA |
2903                           PACKET3_SH_ICACHE_ACTION_ENA);
2904         radeon_ring_write(ring, 0xFFFFFFFF);
2905         radeon_ring_write(ring, 0);
2906         radeon_ring_write(ring, 10); /* poll interval */
2907         /* EVENT_WRITE_EOP - flush caches, send int */
2908         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2909         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2910         radeon_ring_write(ring, addr & 0xffffffff);
2911         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2912         radeon_ring_write(ring, fence->seq);
2913         radeon_ring_write(ring, 0);
2914 }
2915
2916 /*
2917  * IB stuff
2918  */
2919 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2920 {
2921         struct radeon_ring *ring = &rdev->ring[ib->ring];
2922         u32 header;
2923
2924         if (ib->is_const_ib) {
2925                 /* set switch buffer packet before const IB */
2926                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2927                 radeon_ring_write(ring, 0);
2928
2929                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2930         } else {
2931                 u32 next_rptr;
2932                 if (ring->rptr_save_reg) {
2933                         next_rptr = ring->wptr + 3 + 4 + 8;
2934                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2935                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2936                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
2937                         radeon_ring_write(ring, next_rptr);
2938                 } else if (rdev->wb.enabled) {
2939                         next_rptr = ring->wptr + 5 + 4 + 8;
2940                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2941                         radeon_ring_write(ring, (1 << 8));
2942                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2943                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2944                         radeon_ring_write(ring, next_rptr);
2945                 }
2946
2947                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2948         }
2949
2950         radeon_ring_write(ring, header);
2951         radeon_ring_write(ring,
2952 #ifdef __BIG_ENDIAN
2953                           (2 << 0) |
2954 #endif
2955                           (ib->gpu_addr & 0xFFFFFFFC));
2956         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2957         radeon_ring_write(ring, ib->length_dw |
2958                           (ib->vm ? (ib->vm->id << 24) : 0));
2959
2960         if (!ib->is_const_ib) {
2961                 /* flush read cache over gart for this vmid */
2962                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2963                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2964                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2965                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2966                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2967                                   PACKET3_TC_ACTION_ENA |
2968                                   PACKET3_SH_KCACHE_ACTION_ENA |
2969                                   PACKET3_SH_ICACHE_ACTION_ENA);
2970                 radeon_ring_write(ring, 0xFFFFFFFF);
2971                 radeon_ring_write(ring, 0);
2972                 radeon_ring_write(ring, 10); /* poll interval */
2973         }
2974 }
2975
2976 /*
2977  * CP.
2978  */
2979 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2980 {
2981         if (enable)
2982                 WREG32(CP_ME_CNTL, 0);
2983         else {
2984                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2985                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2986                 WREG32(SCRATCH_UMSK, 0);
2987                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2988                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2989                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2990         }
2991         udelay(50);
2992 }
2993
2994 static int si_cp_load_microcode(struct radeon_device *rdev)
2995 {
2996         const __be32 *fw_data;
2997         int i;
2998
2999         if (!rdev->me_fw || !rdev->pfp_fw)
3000                 return -EINVAL;
3001
3002         si_cp_enable(rdev, false);
3003
3004         /* PFP */
3005         fw_data = (const __be32 *)rdev->pfp_fw->data;
3006         WREG32(CP_PFP_UCODE_ADDR, 0);
3007         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3008                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3009         WREG32(CP_PFP_UCODE_ADDR, 0);
3010
3011         /* CE */
3012         fw_data = (const __be32 *)rdev->ce_fw->data;
3013         WREG32(CP_CE_UCODE_ADDR, 0);
3014         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3015                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3016         WREG32(CP_CE_UCODE_ADDR, 0);
3017
3018         /* ME */
3019         fw_data = (const __be32 *)rdev->me_fw->data;
3020         WREG32(CP_ME_RAM_WADDR, 0);
3021         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3022                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3023         WREG32(CP_ME_RAM_WADDR, 0);
3024
3025         WREG32(CP_PFP_UCODE_ADDR, 0);
3026         WREG32(CP_CE_UCODE_ADDR, 0);
3027         WREG32(CP_ME_RAM_WADDR, 0);
3028         WREG32(CP_ME_RAM_RADDR, 0);
3029         return 0;
3030 }
3031
3032 static int si_cp_start(struct radeon_device *rdev)
3033 {
3034         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3035         int r, i;
3036
3037         r = radeon_ring_lock(rdev, ring, 7 + 4);
3038         if (r) {
3039                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3040                 return r;
3041         }
3042         /* init the CP */
3043         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3044         radeon_ring_write(ring, 0x1);
3045         radeon_ring_write(ring, 0x0);
3046         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3047         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3048         radeon_ring_write(ring, 0);
3049         radeon_ring_write(ring, 0);
3050
3051         /* init the CE partitions */
3052         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3053         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3054         radeon_ring_write(ring, 0xc000);
3055         radeon_ring_write(ring, 0xe000);
3056         radeon_ring_unlock_commit(rdev, ring);
3057
3058         si_cp_enable(rdev, true);
3059
3060         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3061         if (r) {
3062                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3063                 return r;
3064         }
3065
3066         /* setup clear context state */
3067         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3068         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3069
3070         for (i = 0; i < si_default_size; i++)
3071                 radeon_ring_write(ring, si_default_state[i]);
3072
3073         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3074         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3075
3076         /* set clear context state */
3077         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3078         radeon_ring_write(ring, 0);
3079
3080         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3081         radeon_ring_write(ring, 0x00000316);
3082         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3083         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3084
3085         radeon_ring_unlock_commit(rdev, ring);
3086
3087         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3088                 ring = &rdev->ring[i];
3089                 r = radeon_ring_lock(rdev, ring, 2);
3090
3091                 /* clear the compute context state */
3092                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3093                 radeon_ring_write(ring, 0);
3094
3095                 radeon_ring_unlock_commit(rdev, ring);
3096         }
3097
3098         return 0;
3099 }
3100
3101 static void si_cp_fini(struct radeon_device *rdev)
3102 {
3103         struct radeon_ring *ring;
3104         si_cp_enable(rdev, false);
3105
3106         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3107         radeon_ring_fini(rdev, ring);
3108         radeon_scratch_free(rdev, ring->rptr_save_reg);
3109
3110         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3111         radeon_ring_fini(rdev, ring);
3112         radeon_scratch_free(rdev, ring->rptr_save_reg);
3113
3114         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3115         radeon_ring_fini(rdev, ring);
3116         radeon_scratch_free(rdev, ring->rptr_save_reg);
3117 }
3118
3119 static int si_cp_resume(struct radeon_device *rdev)
3120 {
3121         struct radeon_ring *ring;
3122         u32 tmp;
3123         u32 rb_bufsz;
3124         int r;
3125
3126         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3127         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3128                                  SOFT_RESET_PA |
3129                                  SOFT_RESET_VGT |
3130                                  SOFT_RESET_SPI |
3131                                  SOFT_RESET_SX));
3132         RREG32(GRBM_SOFT_RESET);
3133         mdelay(15);
3134         WREG32(GRBM_SOFT_RESET, 0);
3135         RREG32(GRBM_SOFT_RESET);
3136
3137         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3138         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3139
3140         /* Set the write pointer delay */
3141         WREG32(CP_RB_WPTR_DELAY, 0);
3142
3143         WREG32(CP_DEBUG, 0);
3144         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3145
3146         /* ring 0 - compute and gfx */
3147         /* Set ring buffer size */
3148         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3149         rb_bufsz = drm_order(ring->ring_size / 8);
3150         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3151 #ifdef __BIG_ENDIAN
3152         tmp |= BUF_SWAP_32BIT;
3153 #endif
3154         WREG32(CP_RB0_CNTL, tmp);
3155
3156         /* Initialize the ring buffer's read and write pointers */
3157         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3158         ring->wptr = 0;
3159         WREG32(CP_RB0_WPTR, ring->wptr);
3160
3161         /* set the wb address whether it's enabled or not */
3162         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3163         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3164
3165         if (rdev->wb.enabled)
3166                 WREG32(SCRATCH_UMSK, 0xff);
3167         else {
3168                 tmp |= RB_NO_UPDATE;
3169                 WREG32(SCRATCH_UMSK, 0);
3170         }
3171
3172         mdelay(1);
3173         WREG32(CP_RB0_CNTL, tmp);
3174
3175         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3176
3177         ring->rptr = RREG32(CP_RB0_RPTR);
3178
3179         /* ring1  - compute only */
3180         /* Set ring buffer size */
3181         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3182         rb_bufsz = drm_order(ring->ring_size / 8);
3183         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3184 #ifdef __BIG_ENDIAN
3185         tmp |= BUF_SWAP_32BIT;
3186 #endif
3187         WREG32(CP_RB1_CNTL, tmp);
3188
3189         /* Initialize the ring buffer's read and write pointers */
3190         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3191         ring->wptr = 0;
3192         WREG32(CP_RB1_WPTR, ring->wptr);
3193
3194         /* set the wb address whether it's enabled or not */
3195         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3196         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3197
3198         mdelay(1);
3199         WREG32(CP_RB1_CNTL, tmp);
3200
3201         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3202
3203         ring->rptr = RREG32(CP_RB1_RPTR);
3204
3205         /* ring2 - compute only */
3206         /* Set ring buffer size */
3207         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3208         rb_bufsz = drm_order(ring->ring_size / 8);
3209         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3210 #ifdef __BIG_ENDIAN
3211         tmp |= BUF_SWAP_32BIT;
3212 #endif
3213         WREG32(CP_RB2_CNTL, tmp);
3214
3215         /* Initialize the ring buffer's read and write pointers */
3216         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3217         ring->wptr = 0;
3218         WREG32(CP_RB2_WPTR, ring->wptr);
3219
3220         /* set the wb address whether it's enabled or not */
3221         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3222         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3223
3224         mdelay(1);
3225         WREG32(CP_RB2_CNTL, tmp);
3226
3227         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3228
3229         ring->rptr = RREG32(CP_RB2_RPTR);
3230
3231         /* start the rings */
3232         si_cp_start(rdev);
3233         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3234         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3235         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3236         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3237         if (r) {
3238                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3239                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3240                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3241                 return r;
3242         }
3243         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3244         if (r) {
3245                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3246         }
3247         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3248         if (r) {
3249                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3250         }
3251
3252         return 0;
3253 }
3254
3255 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3256 {
3257         u32 reset_mask = 0;
3258         u32 tmp;
3259
3260         /* GRBM_STATUS */
3261         tmp = RREG32(GRBM_STATUS);
3262         if (tmp & (PA_BUSY | SC_BUSY |
3263                    BCI_BUSY | SX_BUSY |
3264                    TA_BUSY | VGT_BUSY |
3265                    DB_BUSY | CB_BUSY |
3266                    GDS_BUSY | SPI_BUSY |
3267                    IA_BUSY | IA_BUSY_NO_DMA))
3268                 reset_mask |= RADEON_RESET_GFX;
3269
3270         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3271                    CP_BUSY | CP_COHERENCY_BUSY))
3272                 reset_mask |= RADEON_RESET_CP;
3273
3274         if (tmp & GRBM_EE_BUSY)
3275                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3276
3277         /* GRBM_STATUS2 */
3278         tmp = RREG32(GRBM_STATUS2);
3279         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3280                 reset_mask |= RADEON_RESET_RLC;
3281
3282         /* DMA_STATUS_REG 0 */
3283         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3284         if (!(tmp & DMA_IDLE))
3285                 reset_mask |= RADEON_RESET_DMA;
3286
3287         /* DMA_STATUS_REG 1 */
3288         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3289         if (!(tmp & DMA_IDLE))
3290                 reset_mask |= RADEON_RESET_DMA1;
3291
3292         /* SRBM_STATUS2 */
3293         tmp = RREG32(SRBM_STATUS2);
3294         if (tmp & DMA_BUSY)
3295                 reset_mask |= RADEON_RESET_DMA;
3296
3297         if (tmp & DMA1_BUSY)
3298                 reset_mask |= RADEON_RESET_DMA1;
3299
3300         /* SRBM_STATUS */
3301         tmp = RREG32(SRBM_STATUS);
3302
3303         if (tmp & IH_BUSY)
3304                 reset_mask |= RADEON_RESET_IH;
3305
3306         if (tmp & SEM_BUSY)
3307                 reset_mask |= RADEON_RESET_SEM;
3308
3309         if (tmp & GRBM_RQ_PENDING)
3310                 reset_mask |= RADEON_RESET_GRBM;
3311
3312         if (tmp & VMC_BUSY)
3313                 reset_mask |= RADEON_RESET_VMC;
3314
3315         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3316                    MCC_BUSY | MCD_BUSY))
3317                 reset_mask |= RADEON_RESET_MC;
3318
3319         if (evergreen_is_display_hung(rdev))
3320                 reset_mask |= RADEON_RESET_DISPLAY;
3321
3322         /* VM_L2_STATUS */
3323         tmp = RREG32(VM_L2_STATUS);
3324         if (tmp & L2_BUSY)
3325                 reset_mask |= RADEON_RESET_VMC;
3326
3327         /* Skip MC reset as it's mostly likely not hung, just busy */
3328         if (reset_mask & RADEON_RESET_MC) {
3329                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3330                 reset_mask &= ~RADEON_RESET_MC;
3331         }
3332
3333         return reset_mask;
3334 }
3335
3336 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3337 {
3338         struct evergreen_mc_save save;
3339         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3340         u32 tmp;
3341
3342         if (reset_mask == 0)
3343                 return;
3344
3345         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3346
3347         evergreen_print_gpu_status_regs(rdev);
3348         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3349                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3350         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3351                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3352
3353         /* Disable CP parsing/prefetching */
3354         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3355
3356         if (reset_mask & RADEON_RESET_DMA) {
3357                 /* dma0 */
3358                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3359                 tmp &= ~DMA_RB_ENABLE;
3360                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3361         }
3362         if (reset_mask & RADEON_RESET_DMA1) {
3363                 /* dma1 */
3364                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3365                 tmp &= ~DMA_RB_ENABLE;
3366                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3367         }
3368
3369         udelay(50);
3370
3371         evergreen_mc_stop(rdev, &save);
3372         if (evergreen_mc_wait_for_idle(rdev)) {
3373                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3374         }
3375
3376         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3377                 grbm_soft_reset = SOFT_RESET_CB |
3378                         SOFT_RESET_DB |
3379                         SOFT_RESET_GDS |
3380                         SOFT_RESET_PA |
3381                         SOFT_RESET_SC |
3382                         SOFT_RESET_BCI |
3383                         SOFT_RESET_SPI |
3384                         SOFT_RESET_SX |
3385                         SOFT_RESET_TC |
3386                         SOFT_RESET_TA |
3387                         SOFT_RESET_VGT |
3388                         SOFT_RESET_IA;
3389         }
3390
3391         if (reset_mask & RADEON_RESET_CP) {
3392                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3393
3394                 srbm_soft_reset |= SOFT_RESET_GRBM;
3395         }
3396
3397         if (reset_mask & RADEON_RESET_DMA)
3398                 srbm_soft_reset |= SOFT_RESET_DMA;
3399
3400         if (reset_mask & RADEON_RESET_DMA1)
3401                 srbm_soft_reset |= SOFT_RESET_DMA1;
3402
3403         if (reset_mask & RADEON_RESET_DISPLAY)
3404                 srbm_soft_reset |= SOFT_RESET_DC;
3405
3406         if (reset_mask & RADEON_RESET_RLC)
3407                 grbm_soft_reset |= SOFT_RESET_RLC;
3408
3409         if (reset_mask & RADEON_RESET_SEM)
3410                 srbm_soft_reset |= SOFT_RESET_SEM;
3411
3412         if (reset_mask & RADEON_RESET_IH)
3413                 srbm_soft_reset |= SOFT_RESET_IH;
3414
3415         if (reset_mask & RADEON_RESET_GRBM)
3416                 srbm_soft_reset |= SOFT_RESET_GRBM;
3417
3418         if (reset_mask & RADEON_RESET_VMC)
3419                 srbm_soft_reset |= SOFT_RESET_VMC;
3420
3421         if (reset_mask & RADEON_RESET_MC)
3422                 srbm_soft_reset |= SOFT_RESET_MC;
3423
3424         if (grbm_soft_reset) {
3425                 tmp = RREG32(GRBM_SOFT_RESET);
3426                 tmp |= grbm_soft_reset;
3427                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3428                 WREG32(GRBM_SOFT_RESET, tmp);
3429                 tmp = RREG32(GRBM_SOFT_RESET);
3430
3431                 udelay(50);
3432
3433                 tmp &= ~grbm_soft_reset;
3434                 WREG32(GRBM_SOFT_RESET, tmp);
3435                 tmp = RREG32(GRBM_SOFT_RESET);
3436         }
3437
3438         if (srbm_soft_reset) {
3439                 tmp = RREG32(SRBM_SOFT_RESET);
3440                 tmp |= srbm_soft_reset;
3441                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3442                 WREG32(SRBM_SOFT_RESET, tmp);
3443                 tmp = RREG32(SRBM_SOFT_RESET);
3444
3445                 udelay(50);
3446
3447                 tmp &= ~srbm_soft_reset;
3448                 WREG32(SRBM_SOFT_RESET, tmp);
3449                 tmp = RREG32(SRBM_SOFT_RESET);
3450         }
3451
3452         /* Wait a little for things to settle down */
3453         udelay(50);
3454
3455         evergreen_mc_resume(rdev, &save);
3456         udelay(50);
3457
3458         evergreen_print_gpu_status_regs(rdev);
3459 }
3460
3461 int si_asic_reset(struct radeon_device *rdev)
3462 {
3463         u32 reset_mask;
3464
3465         reset_mask = si_gpu_check_soft_reset(rdev);
3466
3467         if (reset_mask)
3468                 r600_set_bios_scratch_engine_hung(rdev, true);
3469
3470         si_gpu_soft_reset(rdev, reset_mask);
3471
3472         reset_mask = si_gpu_check_soft_reset(rdev);
3473
3474         if (!reset_mask)
3475                 r600_set_bios_scratch_engine_hung(rdev, false);
3476
3477         return 0;
3478 }
3479
3480 /**
3481  * si_gfx_is_lockup - Check if the GFX engine is locked up
3482  *
3483  * @rdev: radeon_device pointer
3484  * @ring: radeon_ring structure holding ring information
3485  *
3486  * Check if the GFX engine is locked up.
3487  * Returns true if the engine appears to be locked up, false if not.
3488  */
3489 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3490 {
3491         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3492
3493         if (!(reset_mask & (RADEON_RESET_GFX |
3494                             RADEON_RESET_COMPUTE |
3495                             RADEON_RESET_CP))) {
3496                 radeon_ring_lockup_update(ring);
3497                 return false;
3498         }
3499         /* force CP activities */
3500         radeon_ring_force_activity(rdev, ring);
3501         return radeon_ring_test_lockup(rdev, ring);
3502 }
3503
3504 /**
3505  * si_dma_is_lockup - Check if the DMA engine is locked up
3506  *
3507  * @rdev: radeon_device pointer
3508  * @ring: radeon_ring structure holding ring information
3509  *
3510  * Check if the async DMA engine is locked up.
3511  * Returns true if the engine appears to be locked up, false if not.
3512  */
3513 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3514 {
3515         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3516         u32 mask;
3517
3518         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3519                 mask = RADEON_RESET_DMA;
3520         else
3521                 mask = RADEON_RESET_DMA1;
3522
3523         if (!(reset_mask & mask)) {
3524                 radeon_ring_lockup_update(ring);
3525                 return false;
3526         }
3527         /* force ring activities */
3528         radeon_ring_force_activity(rdev, ring);
3529         return radeon_ring_test_lockup(rdev, ring);
3530 }
3531
3532 /* MC */
3533 static void si_mc_program(struct radeon_device *rdev)
3534 {
3535         struct evergreen_mc_save save;
3536         u32 tmp;
3537         int i, j;
3538
3539         /* Initialize HDP */
3540         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3541                 WREG32((0x2c14 + j), 0x00000000);
3542                 WREG32((0x2c18 + j), 0x00000000);
3543                 WREG32((0x2c1c + j), 0x00000000);
3544                 WREG32((0x2c20 + j), 0x00000000);
3545                 WREG32((0x2c24 + j), 0x00000000);
3546         }
3547         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3548
3549         evergreen_mc_stop(rdev, &save);
3550         if (radeon_mc_wait_for_idle(rdev)) {
3551                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3552         }
3553         if (!ASIC_IS_NODCE(rdev))
3554                 /* Lockout access through VGA aperture*/
3555                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3556         /* Update configuration */
3557         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3558                rdev->mc.vram_start >> 12);
3559         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3560                rdev->mc.vram_end >> 12);
3561         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3562                rdev->vram_scratch.gpu_addr >> 12);
3563         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3564         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3565         WREG32(MC_VM_FB_LOCATION, tmp);
3566         /* XXX double check these! */
3567         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3568         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3569         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3570         WREG32(MC_VM_AGP_BASE, 0);
3571         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3572         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3573         if (radeon_mc_wait_for_idle(rdev)) {
3574                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3575         }
3576         evergreen_mc_resume(rdev, &save);
3577         if (!ASIC_IS_NODCE(rdev)) {
3578                 /* we need to own VRAM, so turn off the VGA renderer here
3579                  * to stop it overwriting our objects */
3580                 rv515_vga_render_disable(rdev);
3581         }
3582 }
3583
3584 void si_vram_gtt_location(struct radeon_device *rdev,
3585                           struct radeon_mc *mc)
3586 {
3587         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3588                 /* leave room for at least 1024M GTT */
3589                 dev_warn(rdev->dev, "limiting VRAM\n");
3590                 mc->real_vram_size = 0xFFC0000000ULL;
3591                 mc->mc_vram_size = 0xFFC0000000ULL;
3592         }
3593         radeon_vram_location(rdev, &rdev->mc, 0);
3594         rdev->mc.gtt_base_align = 0;
3595         radeon_gtt_location(rdev, mc);
3596 }
3597
3598 static int si_mc_init(struct radeon_device *rdev)
3599 {
3600         u32 tmp;
3601         int chansize, numchan;
3602
3603         /* Get VRAM informations */
3604         rdev->mc.vram_is_ddr = true;
3605         tmp = RREG32(MC_ARB_RAMCFG);
3606         if (tmp & CHANSIZE_OVERRIDE) {
3607                 chansize = 16;
3608         } else if (tmp & CHANSIZE_MASK) {
3609                 chansize = 64;
3610         } else {
3611                 chansize = 32;
3612         }
3613         tmp = RREG32(MC_SHARED_CHMAP);
3614         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3615         case 0:
3616         default:
3617                 numchan = 1;
3618                 break;
3619         case 1:
3620                 numchan = 2;
3621                 break;
3622         case 2:
3623                 numchan = 4;
3624                 break;
3625         case 3:
3626                 numchan = 8;
3627                 break;
3628         case 4:
3629                 numchan = 3;
3630                 break;
3631         case 5:
3632                 numchan = 6;
3633                 break;
3634         case 6:
3635                 numchan = 10;
3636                 break;
3637         case 7:
3638                 numchan = 12;
3639                 break;
3640         case 8:
3641                 numchan = 16;
3642                 break;
3643         }
3644         rdev->mc.vram_width = numchan * chansize;
3645         /* Could aper size report 0 ? */
3646         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3647         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3648         /* size in MB on si */
3649         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3650         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3651         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3652         si_vram_gtt_location(rdev, &rdev->mc);
3653         radeon_update_bandwidth_info(rdev);
3654
3655         return 0;
3656 }
3657
3658 /*
3659  * GART
3660  */
3661 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3662 {
3663         /* flush hdp cache */
3664         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3665
3666         /* bits 0-15 are the VM contexts0-15 */
3667         WREG32(VM_INVALIDATE_REQUEST, 1);
3668 }
3669
3670 static int si_pcie_gart_enable(struct radeon_device *rdev)
3671 {
3672         int r, i;
3673
3674         if (rdev->gart.robj == NULL) {
3675                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3676                 return -EINVAL;
3677         }
3678         r = radeon_gart_table_vram_pin(rdev);
3679         if (r)
3680                 return r;
3681         radeon_gart_restore(rdev);
3682         /* Setup TLB control */
3683         WREG32(MC_VM_MX_L1_TLB_CNTL,
3684                (0xA << 7) |
3685                ENABLE_L1_TLB |
3686                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3687                ENABLE_ADVANCED_DRIVER_MODEL |
3688                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3689         /* Setup L2 cache */
3690         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3691                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3692                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3693                EFFECTIVE_L2_QUEUE_SIZE(7) |
3694                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3695         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3696         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3697                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3698         /* setup context0 */
3699         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3700         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3701         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3702         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3703                         (u32)(rdev->dummy_page.addr >> 12));
3704         WREG32(VM_CONTEXT0_CNTL2, 0);
3705         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3706                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3707
3708         WREG32(0x15D4, 0);
3709         WREG32(0x15D8, 0);
3710         WREG32(0x15DC, 0);
3711
3712         /* empty context1-15 */
3713         /* set vm size, must be a multiple of 4 */
3714         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3715         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3716         /* Assign the pt base to something valid for now; the pts used for
3717          * the VMs are determined by the application and setup and assigned
3718          * on the fly in the vm part of radeon_gart.c
3719          */
3720         for (i = 1; i < 16; i++) {
3721                 if (i < 8)
3722                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3723                                rdev->gart.table_addr >> 12);
3724                 else
3725                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3726                                rdev->gart.table_addr >> 12);
3727         }
3728
3729         /* enable context1-15 */
3730         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3731                (u32)(rdev->dummy_page.addr >> 12));
3732         WREG32(VM_CONTEXT1_CNTL2, 4);
3733         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3734                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3735                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3736                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3737                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3738                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3739                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3740                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3741                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3742                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3743                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3744                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3745                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3746
3747         si_pcie_gart_tlb_flush(rdev);
3748         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3749                  (unsigned)(rdev->mc.gtt_size >> 20),
3750                  (unsigned long long)rdev->gart.table_addr);
3751         rdev->gart.ready = true;
3752         return 0;
3753 }
3754
3755 static void si_pcie_gart_disable(struct radeon_device *rdev)
3756 {
3757         /* Disable all tables */
3758         WREG32(VM_CONTEXT0_CNTL, 0);
3759         WREG32(VM_CONTEXT1_CNTL, 0);
3760         /* Setup TLB control */
3761         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3762                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3763         /* Setup L2 cache */
3764         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3765                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3766                EFFECTIVE_L2_QUEUE_SIZE(7) |
3767                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3768         WREG32(VM_L2_CNTL2, 0);
3769         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3770                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3771         radeon_gart_table_vram_unpin(rdev);
3772 }
3773
3774 static void si_pcie_gart_fini(struct radeon_device *rdev)
3775 {
3776         si_pcie_gart_disable(rdev);
3777         radeon_gart_table_vram_free(rdev);
3778         radeon_gart_fini(rdev);
3779 }
3780
3781 /* vm parser */
3782 static bool si_vm_reg_valid(u32 reg)
3783 {
3784         /* context regs are fine */
3785         if (reg >= 0x28000)
3786                 return true;
3787
3788         /* check config regs */
3789         switch (reg) {
3790         case GRBM_GFX_INDEX:
3791         case CP_STRMOUT_CNTL:
3792         case VGT_VTX_VECT_EJECT_REG:
3793         case VGT_CACHE_INVALIDATION:
3794         case VGT_ESGS_RING_SIZE:
3795         case VGT_GSVS_RING_SIZE:
3796         case VGT_GS_VERTEX_REUSE:
3797         case VGT_PRIMITIVE_TYPE:
3798         case VGT_INDEX_TYPE:
3799         case VGT_NUM_INDICES:
3800         case VGT_NUM_INSTANCES:
3801         case VGT_TF_RING_SIZE:
3802         case VGT_HS_OFFCHIP_PARAM:
3803         case VGT_TF_MEMORY_BASE:
3804         case PA_CL_ENHANCE:
3805         case PA_SU_LINE_STIPPLE_VALUE:
3806         case PA_SC_LINE_STIPPLE_STATE:
3807         case PA_SC_ENHANCE:
3808         case SQC_CACHES:
3809         case SPI_STATIC_THREAD_MGMT_1:
3810         case SPI_STATIC_THREAD_MGMT_2:
3811         case SPI_STATIC_THREAD_MGMT_3:
3812         case SPI_PS_MAX_WAVE_ID:
3813         case SPI_CONFIG_CNTL:
3814         case SPI_CONFIG_CNTL_1:
3815         case TA_CNTL_AUX:
3816                 return true;
3817         default:
3818                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3819                 return false;
3820         }
3821 }
3822
3823 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3824                                   u32 *ib, struct radeon_cs_packet *pkt)
3825 {
3826         switch (pkt->opcode) {
3827         case PACKET3_NOP:
3828         case PACKET3_SET_BASE:
3829         case PACKET3_SET_CE_DE_COUNTERS:
3830         case PACKET3_LOAD_CONST_RAM:
3831         case PACKET3_WRITE_CONST_RAM:
3832         case PACKET3_WRITE_CONST_RAM_OFFSET:
3833         case PACKET3_DUMP_CONST_RAM:
3834         case PACKET3_INCREMENT_CE_COUNTER:
3835         case PACKET3_WAIT_ON_DE_COUNTER:
3836         case PACKET3_CE_WRITE:
3837                 break;
3838         default:
3839                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3840                 return -EINVAL;
3841         }
3842         return 0;
3843 }
3844
3845 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3846                                    u32 *ib, struct radeon_cs_packet *pkt)
3847 {
3848         u32 idx = pkt->idx + 1;
3849         u32 idx_value = ib[idx];
3850         u32 start_reg, end_reg, reg, i;
3851         u32 command, info;
3852
3853         switch (pkt->opcode) {
3854         case PACKET3_NOP:
3855         case PACKET3_SET_BASE:
3856         case PACKET3_CLEAR_STATE:
3857         case PACKET3_INDEX_BUFFER_SIZE:
3858         case PACKET3_DISPATCH_DIRECT:
3859         case PACKET3_DISPATCH_INDIRECT:
3860         case PACKET3_ALLOC_GDS:
3861         case PACKET3_WRITE_GDS_RAM:
3862         case PACKET3_ATOMIC_GDS:
3863         case PACKET3_ATOMIC:
3864         case PACKET3_OCCLUSION_QUERY:
3865         case PACKET3_SET_PREDICATION:
3866         case PACKET3_COND_EXEC:
3867         case PACKET3_PRED_EXEC:
3868         case PACKET3_DRAW_INDIRECT:
3869         case PACKET3_DRAW_INDEX_INDIRECT:
3870         case PACKET3_INDEX_BASE:
3871         case PACKET3_DRAW_INDEX_2:
3872         case PACKET3_CONTEXT_CONTROL:
3873         case PACKET3_INDEX_TYPE:
3874         case PACKET3_DRAW_INDIRECT_MULTI:
3875         case PACKET3_DRAW_INDEX_AUTO:
3876         case PACKET3_DRAW_INDEX_IMMD:
3877         case PACKET3_NUM_INSTANCES:
3878         case PACKET3_DRAW_INDEX_MULTI_AUTO:
3879         case PACKET3_STRMOUT_BUFFER_UPDATE:
3880         case PACKET3_DRAW_INDEX_OFFSET_2:
3881         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3882         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3883         case PACKET3_MPEG_INDEX:
3884         case PACKET3_WAIT_REG_MEM:
3885         case PACKET3_MEM_WRITE:
3886         case PACKET3_PFP_SYNC_ME:
3887         case PACKET3_SURFACE_SYNC:
3888         case PACKET3_EVENT_WRITE:
3889         case PACKET3_EVENT_WRITE_EOP:
3890         case PACKET3_EVENT_WRITE_EOS:
3891         case PACKET3_SET_CONTEXT_REG:
3892         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3893         case PACKET3_SET_SH_REG:
3894         case PACKET3_SET_SH_REG_OFFSET:
3895         case PACKET3_INCREMENT_DE_COUNTER:
3896         case PACKET3_WAIT_ON_CE_COUNTER:
3897         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3898         case PACKET3_ME_WRITE:
3899                 break;
3900         case PACKET3_COPY_DATA:
3901                 if ((idx_value & 0xf00) == 0) {
3902                         reg = ib[idx + 3] * 4;
3903                         if (!si_vm_reg_valid(reg))
3904                                 return -EINVAL;
3905                 }
3906                 break;
3907         case PACKET3_WRITE_DATA:
3908                 if ((idx_value & 0xf00) == 0) {
3909                         start_reg = ib[idx + 1] * 4;
3910                         if (idx_value & 0x10000) {
3911                                 if (!si_vm_reg_valid(start_reg))
3912                                         return -EINVAL;
3913                         } else {
3914                                 for (i = 0; i < (pkt->count - 2); i++) {
3915                                         reg = start_reg + (4 * i);
3916                                         if (!si_vm_reg_valid(reg))
3917                                                 return -EINVAL;
3918                                 }
3919                         }
3920                 }
3921                 break;
3922         case PACKET3_COND_WRITE:
3923                 if (idx_value & 0x100) {
3924                         reg = ib[idx + 5] * 4;
3925                         if (!si_vm_reg_valid(reg))
3926                                 return -EINVAL;
3927                 }
3928                 break;
3929         case PACKET3_COPY_DW:
3930                 if (idx_value & 0x2) {
3931                         reg = ib[idx + 3] * 4;
3932                         if (!si_vm_reg_valid(reg))
3933                                 return -EINVAL;
3934                 }
3935                 break;
3936         case PACKET3_SET_CONFIG_REG:
3937                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3938                 end_reg = 4 * pkt->count + start_reg - 4;
3939                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3940                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3941                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3942                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3943                         return -EINVAL;
3944                 }
3945                 for (i = 0; i < pkt->count; i++) {
3946                         reg = start_reg + (4 * i);
3947                         if (!si_vm_reg_valid(reg))
3948                                 return -EINVAL;
3949                 }
3950                 break;
3951         case PACKET3_CP_DMA:
3952                 command = ib[idx + 4];
3953                 info = ib[idx + 1];
3954                 if (command & PACKET3_CP_DMA_CMD_SAS) {
3955                         /* src address space is register */
3956                         if (((info & 0x60000000) >> 29) == 0) {
3957                                 start_reg = idx_value << 2;
3958                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3959                                         reg = start_reg;
3960                                         if (!si_vm_reg_valid(reg)) {
3961                                                 DRM_ERROR("CP DMA Bad SRC register\n");
3962                                                 return -EINVAL;
3963                                         }
3964                                 } else {
3965                                         for (i = 0; i < (command & 0x1fffff); i++) {
3966                                                 reg = start_reg + (4 * i);
3967                                                 if (!si_vm_reg_valid(reg)) {
3968                                                         DRM_ERROR("CP DMA Bad SRC register\n");
3969                                                         return -EINVAL;
3970                                                 }
3971                                         }
3972                                 }
3973                         }
3974                 }
3975                 if (command & PACKET3_CP_DMA_CMD_DAS) {
3976                         /* dst address space is register */
3977                         if (((info & 0x00300000) >> 20) == 0) {
3978                                 start_reg = ib[idx + 2];
3979                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3980                                         reg = start_reg;
3981                                         if (!si_vm_reg_valid(reg)) {
3982                                                 DRM_ERROR("CP DMA Bad DST register\n");
3983                                                 return -EINVAL;
3984                                         }
3985                                 } else {
3986                                         for (i = 0; i < (command & 0x1fffff); i++) {
3987                                                 reg = start_reg + (4 * i);
3988                                                 if (!si_vm_reg_valid(reg)) {
3989                                                         DRM_ERROR("CP DMA Bad DST register\n");
3990                                                         return -EINVAL;
3991                                                 }
3992                                         }
3993                                 }
3994                         }
3995                 }
3996                 break;
3997         default:
3998                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3999                 return -EINVAL;
4000         }
4001         return 0;
4002 }
4003
4004 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4005                                        u32 *ib, struct radeon_cs_packet *pkt)
4006 {
4007         u32 idx = pkt->idx + 1;
4008         u32 idx_value = ib[idx];
4009         u32 start_reg, reg, i;
4010
4011         switch (pkt->opcode) {
4012         case PACKET3_NOP:
4013         case PACKET3_SET_BASE:
4014         case PACKET3_CLEAR_STATE:
4015         case PACKET3_DISPATCH_DIRECT:
4016         case PACKET3_DISPATCH_INDIRECT:
4017         case PACKET3_ALLOC_GDS:
4018         case PACKET3_WRITE_GDS_RAM:
4019         case PACKET3_ATOMIC_GDS:
4020         case PACKET3_ATOMIC:
4021         case PACKET3_OCCLUSION_QUERY:
4022         case PACKET3_SET_PREDICATION:
4023         case PACKET3_COND_EXEC:
4024         case PACKET3_PRED_EXEC:
4025         case PACKET3_CONTEXT_CONTROL:
4026         case PACKET3_STRMOUT_BUFFER_UPDATE:
4027         case PACKET3_WAIT_REG_MEM:
4028         case PACKET3_MEM_WRITE:
4029         case PACKET3_PFP_SYNC_ME:
4030         case PACKET3_SURFACE_SYNC:
4031         case PACKET3_EVENT_WRITE:
4032         case PACKET3_EVENT_WRITE_EOP:
4033         case PACKET3_EVENT_WRITE_EOS:
4034         case PACKET3_SET_CONTEXT_REG:
4035         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4036         case PACKET3_SET_SH_REG:
4037         case PACKET3_SET_SH_REG_OFFSET:
4038         case PACKET3_INCREMENT_DE_COUNTER:
4039         case PACKET3_WAIT_ON_CE_COUNTER:
4040         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4041         case PACKET3_ME_WRITE:
4042                 break;
4043         case PACKET3_COPY_DATA:
4044                 if ((idx_value & 0xf00) == 0) {
4045                         reg = ib[idx + 3] * 4;
4046                         if (!si_vm_reg_valid(reg))
4047                                 return -EINVAL;
4048                 }
4049                 break;
4050         case PACKET3_WRITE_DATA:
4051                 if ((idx_value & 0xf00) == 0) {
4052                         start_reg = ib[idx + 1] * 4;
4053                         if (idx_value & 0x10000) {
4054                                 if (!si_vm_reg_valid(start_reg))
4055                                         return -EINVAL;
4056                         } else {
4057                                 for (i = 0; i < (pkt->count - 2); i++) {
4058                                         reg = start_reg + (4 * i);
4059                                         if (!si_vm_reg_valid(reg))
4060                                                 return -EINVAL;
4061                                 }
4062                         }
4063                 }
4064                 break;
4065         case PACKET3_COND_WRITE:
4066                 if (idx_value & 0x100) {
4067                         reg = ib[idx + 5] * 4;
4068                         if (!si_vm_reg_valid(reg))
4069                                 return -EINVAL;
4070                 }
4071                 break;
4072         case PACKET3_COPY_DW:
4073                 if (idx_value & 0x2) {
4074                         reg = ib[idx + 3] * 4;
4075                         if (!si_vm_reg_valid(reg))
4076                                 return -EINVAL;
4077                 }
4078                 break;
4079         default:
4080                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4081                 return -EINVAL;
4082         }
4083         return 0;
4084 }
4085
4086 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4087 {
4088         int ret = 0;
4089         u32 idx = 0;
4090         struct radeon_cs_packet pkt;
4091
4092         do {
4093                 pkt.idx = idx;
4094                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4095                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4096                 pkt.one_reg_wr = 0;
4097                 switch (pkt.type) {
4098                 case RADEON_PACKET_TYPE0:
4099                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4100                         ret = -EINVAL;
4101                         break;
4102                 case RADEON_PACKET_TYPE2:
4103                         idx += 1;
4104                         break;
4105                 case RADEON_PACKET_TYPE3:
4106                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4107                         if (ib->is_const_ib)
4108                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4109                         else {
4110                                 switch (ib->ring) {
4111                                 case RADEON_RING_TYPE_GFX_INDEX:
4112                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4113                                         break;
4114                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4115                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4116                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4117                                         break;
4118                                 default:
4119                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4120                                         ret = -EINVAL;
4121                                         break;
4122                                 }
4123                         }
4124                         idx += pkt.count + 2;
4125                         break;
4126                 default:
4127                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4128                         ret = -EINVAL;
4129                         break;
4130                 }
4131                 if (ret)
4132                         break;
4133         } while (idx < ib->length_dw);
4134
4135         return ret;
4136 }
4137
4138 /*
4139  * vm
4140  */
4141 int si_vm_init(struct radeon_device *rdev)
4142 {
4143         /* number of VMs */
4144         rdev->vm_manager.nvm = 16;
4145         /* base offset of vram pages */
4146         rdev->vm_manager.vram_base_offset = 0;
4147
4148         return 0;
4149 }
4150
4151 void si_vm_fini(struct radeon_device *rdev)
4152 {
4153 }
4154
4155 /**
4156  * si_vm_set_page - update the page tables using the CP
4157  *
4158  * @rdev: radeon_device pointer
4159  * @ib: indirect buffer to fill with commands
4160  * @pe: addr of the page entry
4161  * @addr: dst addr to write into pe
4162  * @count: number of page entries to update
4163  * @incr: increase next addr by incr bytes
4164  * @flags: access flags
4165  *
4166  * Update the page tables using the CP (SI).
4167  */
4168 void si_vm_set_page(struct radeon_device *rdev,
4169                     struct radeon_ib *ib,
4170                     uint64_t pe,
4171                     uint64_t addr, unsigned count,
4172                     uint32_t incr, uint32_t flags)
4173 {
4174         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4175         uint64_t value;
4176         unsigned ndw;
4177
4178         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4179                 while (count) {
4180                         ndw = 2 + count * 2;
4181                         if (ndw > 0x3FFE)
4182                                 ndw = 0x3FFE;
4183
4184                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4185                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4186                                         WRITE_DATA_DST_SEL(1));
4187                         ib->ptr[ib->length_dw++] = pe;
4188                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4189                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4190                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4191                                         value = radeon_vm_map_gart(rdev, addr);
4192                                         value &= 0xFFFFFFFFFFFFF000ULL;
4193                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4194                                         value = addr;
4195                                 } else {
4196                                         value = 0;
4197                                 }
4198                                 addr += incr;
4199                                 value |= r600_flags;
4200                                 ib->ptr[ib->length_dw++] = value;
4201                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4202                         }
4203                 }
4204         } else {
4205                 /* DMA */
4206                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4207                         while (count) {
4208                                 ndw = count * 2;
4209                                 if (ndw > 0xFFFFE)
4210                                         ndw = 0xFFFFE;
4211
4212                                 /* for non-physically contiguous pages (system) */
4213                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4214                                 ib->ptr[ib->length_dw++] = pe;
4215                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4216                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4217                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4218                                                 value = radeon_vm_map_gart(rdev, addr);
4219                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4220                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4221                                                 value = addr;
4222                                         } else {
4223                                                 value = 0;
4224                                         }
4225                                         addr += incr;
4226                                         value |= r600_flags;
4227                                         ib->ptr[ib->length_dw++] = value;
4228                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4229                                 }
4230                         }
4231                 } else {
4232                         while (count) {
4233                                 ndw = count * 2;
4234                                 if (ndw > 0xFFFFE)
4235                                         ndw = 0xFFFFE;
4236
4237                                 if (flags & RADEON_VM_PAGE_VALID)
4238                                         value = addr;
4239                                 else
4240                                         value = 0;
4241                                 /* for physically contiguous pages (vram) */
4242                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4243                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4244                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4245                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4246                                 ib->ptr[ib->length_dw++] = 0;
4247                                 ib->ptr[ib->length_dw++] = value; /* value */
4248                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4249                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4250                                 ib->ptr[ib->length_dw++] = 0;
4251                                 pe += ndw * 4;
4252                                 addr += (ndw / 2) * incr;
4253                                 count -= ndw / 2;
4254                         }
4255                 }
4256                 while (ib->length_dw & 0x7)
4257                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4258         }
4259 }
4260
4261 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4262 {
4263         struct radeon_ring *ring = &rdev->ring[ridx];
4264
4265         if (vm == NULL)
4266                 return;
4267
4268         /* write new base address */
4269         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4270         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4271                                  WRITE_DATA_DST_SEL(0)));
4272
4273         if (vm->id < 8) {
4274                 radeon_ring_write(ring,
4275                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4276         } else {
4277                 radeon_ring_write(ring,
4278                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4279         }
4280         radeon_ring_write(ring, 0);
4281         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4282
4283         /* flush hdp cache */
4284         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4285         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4286                                  WRITE_DATA_DST_SEL(0)));
4287         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4288         radeon_ring_write(ring, 0);
4289         radeon_ring_write(ring, 0x1);
4290
4291         /* bits 0-15 are the VM contexts0-15 */
4292         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4293         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4294                                  WRITE_DATA_DST_SEL(0)));
4295         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4296         radeon_ring_write(ring, 0);
4297         radeon_ring_write(ring, 1 << vm->id);
4298
4299         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4300         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4301         radeon_ring_write(ring, 0x0);
4302 }
4303
4304 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4305 {
4306         struct radeon_ring *ring = &rdev->ring[ridx];
4307
4308         if (vm == NULL)
4309                 return;
4310
4311         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4312         if (vm->id < 8) {
4313                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4314         } else {
4315                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4316         }
4317         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4318
4319         /* flush hdp cache */
4320         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4321         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4322         radeon_ring_write(ring, 1);
4323
4324         /* bits 0-7 are the VM contexts0-7 */
4325         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4326         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4327         radeon_ring_write(ring, 1 << vm->id);
4328 }
4329
4330 /*
4331  * RLC
4332  */
4333 void si_rlc_fini(struct radeon_device *rdev)
4334 {
4335         int r;
4336
4337         /* save restore block */
4338         if (rdev->rlc.save_restore_obj) {
4339                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4340                 if (unlikely(r != 0))
4341                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4342                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4343                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4344
4345                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4346                 rdev->rlc.save_restore_obj = NULL;
4347         }
4348
4349         /* clear state block */
4350         if (rdev->rlc.clear_state_obj) {
4351                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4352                 if (unlikely(r != 0))
4353                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4354                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4355                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4356
4357                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4358                 rdev->rlc.clear_state_obj = NULL;
4359         }
4360 }
4361
4362 int si_rlc_init(struct radeon_device *rdev)
4363 {
4364         int r;
4365
4366         /* save restore block */
4367         if (rdev->rlc.save_restore_obj == NULL) {
4368                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4369                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4370                                      &rdev->rlc.save_restore_obj);
4371                 if (r) {
4372                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4373                         return r;
4374                 }
4375         }
4376
4377         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4378         if (unlikely(r != 0)) {
4379                 si_rlc_fini(rdev);
4380                 return r;
4381         }
4382         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4383                           &rdev->rlc.save_restore_gpu_addr);
4384         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4385         if (r) {
4386                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4387                 si_rlc_fini(rdev);
4388                 return r;
4389         }
4390
4391         /* clear state block */
4392         if (rdev->rlc.clear_state_obj == NULL) {
4393                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4394                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4395                                      &rdev->rlc.clear_state_obj);
4396                 if (r) {
4397                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4398                         si_rlc_fini(rdev);
4399                         return r;
4400                 }
4401         }
4402         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4403         if (unlikely(r != 0)) {
4404                 si_rlc_fini(rdev);
4405                 return r;
4406         }
4407         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4408                           &rdev->rlc.clear_state_gpu_addr);
4409         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4410         if (r) {
4411                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4412                 si_rlc_fini(rdev);
4413                 return r;
4414         }
4415
4416         return 0;
4417 }
4418
4419 static void si_rlc_stop(struct radeon_device *rdev)
4420 {
4421         WREG32(RLC_CNTL, 0);
4422 }
4423
4424 static void si_rlc_start(struct radeon_device *rdev)
4425 {
4426         WREG32(RLC_CNTL, RLC_ENABLE);
4427 }
4428
4429 static int si_rlc_resume(struct radeon_device *rdev)
4430 {
4431         u32 i;
4432         const __be32 *fw_data;
4433
4434         if (!rdev->rlc_fw)
4435                 return -EINVAL;
4436
4437         si_rlc_stop(rdev);
4438
4439         WREG32(RLC_RL_BASE, 0);
4440         WREG32(RLC_RL_SIZE, 0);
4441         WREG32(RLC_LB_CNTL, 0);
4442         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4443         WREG32(RLC_LB_CNTR_INIT, 0);
4444
4445         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4446         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4447
4448         WREG32(RLC_MC_CNTL, 0);
4449         WREG32(RLC_UCODE_CNTL, 0);
4450
4451         fw_data = (const __be32 *)rdev->rlc_fw->data;
4452         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4453                 WREG32(RLC_UCODE_ADDR, i);
4454                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4455         }
4456         WREG32(RLC_UCODE_ADDR, 0);
4457
4458         si_rlc_start(rdev);
4459
4460         return 0;
4461 }
4462
4463 static void si_enable_interrupts(struct radeon_device *rdev)
4464 {
4465         u32 ih_cntl = RREG32(IH_CNTL);
4466         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4467
4468         ih_cntl |= ENABLE_INTR;
4469         ih_rb_cntl |= IH_RB_ENABLE;
4470         WREG32(IH_CNTL, ih_cntl);
4471         WREG32(IH_RB_CNTL, ih_rb_cntl);
4472         rdev->ih.enabled = true;
4473 }
4474
4475 static void si_disable_interrupts(struct radeon_device *rdev)
4476 {
4477         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4478         u32 ih_cntl = RREG32(IH_CNTL);
4479
4480         ih_rb_cntl &= ~IH_RB_ENABLE;
4481         ih_cntl &= ~ENABLE_INTR;
4482         WREG32(IH_RB_CNTL, ih_rb_cntl);
4483         WREG32(IH_CNTL, ih_cntl);
4484         /* set rptr, wptr to 0 */
4485         WREG32(IH_RB_RPTR, 0);
4486         WREG32(IH_RB_WPTR, 0);
4487         rdev->ih.enabled = false;
4488         rdev->ih.rptr = 0;
4489 }
4490
4491 static void si_disable_interrupt_state(struct radeon_device *rdev)
4492 {
4493         u32 tmp;
4494
4495         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4496         WREG32(CP_INT_CNTL_RING1, 0);
4497         WREG32(CP_INT_CNTL_RING2, 0);
4498         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4499         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4500         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4501         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4502         WREG32(GRBM_INT_CNTL, 0);
4503         if (rdev->num_crtc >= 2) {
4504                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4505                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4506         }
4507         if (rdev->num_crtc >= 4) {
4508                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4509                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4510         }
4511         if (rdev->num_crtc >= 6) {
4512                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4513                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4514         }
4515
4516         if (rdev->num_crtc >= 2) {
4517                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4518                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4519         }
4520         if (rdev->num_crtc >= 4) {
4521                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4522                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4523         }
4524         if (rdev->num_crtc >= 6) {
4525                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4526                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4527         }
4528
4529         if (!ASIC_IS_NODCE(rdev)) {
4530                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4531
4532                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4533                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4534                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4535                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4536                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4537                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4538                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4539                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4540                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4541                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4542                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4543                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4544         }
4545 }
4546
4547 static int si_irq_init(struct radeon_device *rdev)
4548 {
4549         int ret = 0;
4550         int rb_bufsz;
4551         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4552
4553         /* allocate ring */
4554         ret = r600_ih_ring_alloc(rdev);
4555         if (ret)
4556                 return ret;
4557
4558         /* disable irqs */
4559         si_disable_interrupts(rdev);
4560
4561         /* init rlc */
4562         ret = si_rlc_resume(rdev);
4563         if (ret) {
4564                 r600_ih_ring_fini(rdev);
4565                 return ret;
4566         }
4567
4568         /* setup interrupt control */
4569         /* set dummy read address to ring address */
4570         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4571         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4572         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4573          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4574          */
4575         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4576         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4577         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4578         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4579
4580         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4581         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4582
4583         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4584                       IH_WPTR_OVERFLOW_CLEAR |
4585                       (rb_bufsz << 1));
4586
4587         if (rdev->wb.enabled)
4588                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4589
4590         /* set the writeback address whether it's enabled or not */
4591         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4592         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4593
4594         WREG32(IH_RB_CNTL, ih_rb_cntl);
4595
4596         /* set rptr, wptr to 0 */
4597         WREG32(IH_RB_RPTR, 0);
4598         WREG32(IH_RB_WPTR, 0);
4599
4600         /* Default settings for IH_CNTL (disabled at first) */
4601         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4602         /* RPTR_REARM only works if msi's are enabled */
4603         if (rdev->msi_enabled)
4604                 ih_cntl |= RPTR_REARM;
4605         WREG32(IH_CNTL, ih_cntl);
4606
4607         /* force the active interrupt state to all disabled */
4608         si_disable_interrupt_state(rdev);
4609
4610         pci_set_master(rdev->pdev);
4611
4612         /* enable irqs */
4613         si_enable_interrupts(rdev);
4614
4615         return ret;
4616 }
4617
4618 int si_irq_set(struct radeon_device *rdev)
4619 {
4620         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4621         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4622         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4623         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
4624         u32 grbm_int_cntl = 0;
4625         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4626         u32 dma_cntl, dma_cntl1;
4627
4628         if (!rdev->irq.installed) {
4629                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4630                 return -EINVAL;
4631         }
4632         /* don't enable anything if the ih is disabled */
4633         if (!rdev->ih.enabled) {
4634                 si_disable_interrupts(rdev);
4635                 /* force the active interrupt state to all disabled */
4636                 si_disable_interrupt_state(rdev);
4637                 return 0;
4638         }
4639
4640         if (!ASIC_IS_NODCE(rdev)) {
4641                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4642                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4643                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4644                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4645                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4646                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4647         }
4648
4649         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4650         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4651
4652         /* enable CP interrupts on all rings */
4653         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4654                 DRM_DEBUG("si_irq_set: sw int gfx\n");
4655                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4656         }
4657         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4658                 DRM_DEBUG("si_irq_set: sw int cp1\n");
4659                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4660         }
4661         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4662                 DRM_DEBUG("si_irq_set: sw int cp2\n");
4663                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4664         }
4665         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4666                 DRM_DEBUG("si_irq_set: sw int dma\n");
4667                 dma_cntl |= TRAP_ENABLE;
4668         }
4669
4670         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4671                 DRM_DEBUG("si_irq_set: sw int dma1\n");
4672                 dma_cntl1 |= TRAP_ENABLE;
4673         }
4674         if (rdev->irq.crtc_vblank_int[0] ||
4675             atomic_read(&rdev->irq.pflip[0])) {
4676                 DRM_DEBUG("si_irq_set: vblank 0\n");
4677                 crtc1 |= VBLANK_INT_MASK;
4678         }
4679         if (rdev->irq.crtc_vblank_int[1] ||
4680             atomic_read(&rdev->irq.pflip[1])) {
4681                 DRM_DEBUG("si_irq_set: vblank 1\n");
4682                 crtc2 |= VBLANK_INT_MASK;
4683         }
4684         if (rdev->irq.crtc_vblank_int[2] ||
4685             atomic_read(&rdev->irq.pflip[2])) {
4686                 DRM_DEBUG("si_irq_set: vblank 2\n");
4687                 crtc3 |= VBLANK_INT_MASK;
4688         }
4689         if (rdev->irq.crtc_vblank_int[3] ||
4690             atomic_read(&rdev->irq.pflip[3])) {
4691                 DRM_DEBUG("si_irq_set: vblank 3\n");
4692                 crtc4 |= VBLANK_INT_MASK;
4693         }
4694         if (rdev->irq.crtc_vblank_int[4] ||
4695             atomic_read(&rdev->irq.pflip[4])) {
4696                 DRM_DEBUG("si_irq_set: vblank 4\n");
4697                 crtc5 |= VBLANK_INT_MASK;
4698         }
4699         if (rdev->irq.crtc_vblank_int[5] ||
4700             atomic_read(&rdev->irq.pflip[5])) {
4701                 DRM_DEBUG("si_irq_set: vblank 5\n");
4702                 crtc6 |= VBLANK_INT_MASK;
4703         }
4704         if (rdev->irq.hpd[0]) {
4705                 DRM_DEBUG("si_irq_set: hpd 1\n");
4706                 hpd1 |= DC_HPDx_INT_EN;
4707         }
4708         if (rdev->irq.hpd[1]) {
4709                 DRM_DEBUG("si_irq_set: hpd 2\n");
4710                 hpd2 |= DC_HPDx_INT_EN;
4711         }
4712         if (rdev->irq.hpd[2]) {
4713                 DRM_DEBUG("si_irq_set: hpd 3\n");
4714                 hpd3 |= DC_HPDx_INT_EN;
4715         }
4716         if (rdev->irq.hpd[3]) {
4717                 DRM_DEBUG("si_irq_set: hpd 4\n");
4718                 hpd4 |= DC_HPDx_INT_EN;
4719         }
4720         if (rdev->irq.hpd[4]) {
4721                 DRM_DEBUG("si_irq_set: hpd 5\n");
4722                 hpd5 |= DC_HPDx_INT_EN;
4723         }
4724         if (rdev->irq.hpd[5]) {
4725                 DRM_DEBUG("si_irq_set: hpd 6\n");
4726                 hpd6 |= DC_HPDx_INT_EN;
4727         }
4728
4729         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4730         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4731         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4732
4733         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4734         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4735
4736         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4737
4738         if (rdev->num_crtc >= 2) {
4739                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4740                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4741         }
4742         if (rdev->num_crtc >= 4) {
4743                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4744                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4745         }
4746         if (rdev->num_crtc >= 6) {
4747                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4748                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4749         }
4750
4751         if (rdev->num_crtc >= 2) {
4752                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4753                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4754         }
4755         if (rdev->num_crtc >= 4) {
4756                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4757                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4758         }
4759         if (rdev->num_crtc >= 6) {
4760                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4761                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4762         }
4763
4764         if (!ASIC_IS_NODCE(rdev)) {
4765                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4766                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4767                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4768                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4769                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4770                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4771         }
4772
4773         return 0;
4774 }
4775
4776 static inline void si_irq_ack(struct radeon_device *rdev)
4777 {
4778         u32 tmp;
4779
4780         if (ASIC_IS_NODCE(rdev))
4781                 return;
4782
4783         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4784         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4785         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4786         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4787         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4788         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4789         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4790         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4791         if (rdev->num_crtc >= 4) {
4792                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4793                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4794         }
4795         if (rdev->num_crtc >= 6) {
4796                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4797                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4798         }
4799
4800         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4801                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4802         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4803                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4804         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4805                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4806         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4807                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4808         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4809                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4810         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4811                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4812
4813         if (rdev->num_crtc >= 4) {
4814                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4815                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4816                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4817                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4818                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4819                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4820                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4821                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4822                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4823                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4824                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4825                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4826         }
4827
4828         if (rdev->num_crtc >= 6) {
4829                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4830                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4831                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4832                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4833                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4834                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4835                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4836                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4837                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4838                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4839                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4840                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4841         }
4842
4843         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4844                 tmp = RREG32(DC_HPD1_INT_CONTROL);
4845                 tmp |= DC_HPDx_INT_ACK;
4846                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4847         }
4848         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4849                 tmp = RREG32(DC_HPD2_INT_CONTROL);
4850                 tmp |= DC_HPDx_INT_ACK;
4851                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4852         }
4853         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4854                 tmp = RREG32(DC_HPD3_INT_CONTROL);
4855                 tmp |= DC_HPDx_INT_ACK;
4856                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4857         }
4858         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4859                 tmp = RREG32(DC_HPD4_INT_CONTROL);
4860                 tmp |= DC_HPDx_INT_ACK;
4861                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4862         }
4863         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4864                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4865                 tmp |= DC_HPDx_INT_ACK;
4866                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4867         }
4868         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4869                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4870                 tmp |= DC_HPDx_INT_ACK;
4871                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4872         }
4873 }
4874
4875 static void si_irq_disable(struct radeon_device *rdev)
4876 {
4877         si_disable_interrupts(rdev);
4878         /* Wait and acknowledge irq */
4879         mdelay(1);
4880         si_irq_ack(rdev);
4881         si_disable_interrupt_state(rdev);
4882 }
4883
4884 static void si_irq_suspend(struct radeon_device *rdev)
4885 {
4886         si_irq_disable(rdev);
4887         si_rlc_stop(rdev);
4888 }
4889
4890 static void si_irq_fini(struct radeon_device *rdev)
4891 {
4892         si_irq_suspend(rdev);
4893         r600_ih_ring_fini(rdev);
4894 }
4895
4896 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4897 {
4898         u32 wptr, tmp;
4899
4900         if (rdev->wb.enabled)
4901                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4902         else
4903                 wptr = RREG32(IH_RB_WPTR);
4904
4905         if (wptr & RB_OVERFLOW) {
4906                 /* When a ring buffer overflow happen start parsing interrupt
4907                  * from the last not overwritten vector (wptr + 16). Hopefully
4908                  * this should allow us to catchup.
4909                  */
4910                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4911                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4912                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4913                 tmp = RREG32(IH_RB_CNTL);
4914                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4915                 WREG32(IH_RB_CNTL, tmp);
4916         }
4917         return (wptr & rdev->ih.ptr_mask);
4918 }
4919
4920 /*        SI IV Ring
4921  * Each IV ring entry is 128 bits:
4922  * [7:0]    - interrupt source id
4923  * [31:8]   - reserved
4924  * [59:32]  - interrupt source data
4925  * [63:60]  - reserved
4926  * [71:64]  - RINGID
4927  * [79:72]  - VMID
4928  * [127:80] - reserved
4929  */
4930 int si_irq_process(struct radeon_device *rdev)
4931 {
4932         u32 wptr;
4933         u32 rptr;
4934         u32 src_id, src_data, ring_id;
4935         u32 ring_index;
4936         bool queue_hotplug = false;
4937
4938         if (!rdev->ih.enabled || rdev->shutdown)
4939                 return IRQ_NONE;
4940
4941         wptr = si_get_ih_wptr(rdev);
4942
4943 restart_ih:
4944         /* is somebody else already processing irqs? */
4945         if (atomic_xchg(&rdev->ih.lock, 1))
4946                 return IRQ_NONE;
4947
4948         rptr = rdev->ih.rptr;
4949         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4950
4951         /* Order reading of wptr vs. reading of IH ring data */
4952         rmb();
4953
4954         /* display interrupts */
4955         si_irq_ack(rdev);
4956
4957         while (rptr != wptr) {
4958                 /* wptr/rptr are in bytes! */
4959                 ring_index = rptr / 4;
4960                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4961                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4962                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4963
4964                 switch (src_id) {
4965                 case 1: /* D1 vblank/vline */
4966                         switch (src_data) {
4967                         case 0: /* D1 vblank */
4968                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4969                                         if (rdev->irq.crtc_vblank_int[0]) {
4970                                                 drm_handle_vblank(rdev->ddev, 0);
4971                                                 rdev->pm.vblank_sync = true;
4972                                                 wake_up(&rdev->irq.vblank_queue);
4973                                         }
4974                                         if (atomic_read(&rdev->irq.pflip[0]))
4975                                                 radeon_crtc_handle_flip(rdev, 0);
4976                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4977                                         DRM_DEBUG("IH: D1 vblank\n");
4978                                 }
4979                                 break;
4980                         case 1: /* D1 vline */
4981                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4982                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4983                                         DRM_DEBUG("IH: D1 vline\n");
4984                                 }
4985                                 break;
4986                         default:
4987                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4988                                 break;
4989                         }
4990                         break;
4991                 case 2: /* D2 vblank/vline */
4992                         switch (src_data) {
4993                         case 0: /* D2 vblank */
4994                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4995                                         if (rdev->irq.crtc_vblank_int[1]) {
4996                                                 drm_handle_vblank(rdev->ddev, 1);
4997                                                 rdev->pm.vblank_sync = true;
4998                                                 wake_up(&rdev->irq.vblank_queue);
4999                                         }
5000                                         if (atomic_read(&rdev->irq.pflip[1]))
5001                                                 radeon_crtc_handle_flip(rdev, 1);
5002                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5003                                         DRM_DEBUG("IH: D2 vblank\n");
5004                                 }
5005                                 break;
5006                         case 1: /* D2 vline */
5007                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5008                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5009                                         DRM_DEBUG("IH: D2 vline\n");
5010                                 }
5011                                 break;
5012                         default:
5013                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5014                                 break;
5015                         }
5016                         break;
5017                 case 3: /* D3 vblank/vline */
5018                         switch (src_data) {
5019                         case 0: /* D3 vblank */
5020                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5021                                         if (rdev->irq.crtc_vblank_int[2]) {
5022                                                 drm_handle_vblank(rdev->ddev, 2);
5023                                                 rdev->pm.vblank_sync = true;
5024                                                 wake_up(&rdev->irq.vblank_queue);
5025                                         }
5026                                         if (atomic_read(&rdev->irq.pflip[2]))
5027                                                 radeon_crtc_handle_flip(rdev, 2);
5028                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5029                                         DRM_DEBUG("IH: D3 vblank\n");
5030                                 }
5031                                 break;
5032                         case 1: /* D3 vline */
5033                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5034                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5035                                         DRM_DEBUG("IH: D3 vline\n");
5036                                 }
5037                                 break;
5038                         default:
5039                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5040                                 break;
5041                         }
5042                         break;
5043                 case 4: /* D4 vblank/vline */
5044                         switch (src_data) {
5045                         case 0: /* D4 vblank */
5046                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5047                                         if (rdev->irq.crtc_vblank_int[3]) {
5048                                                 drm_handle_vblank(rdev->ddev, 3);
5049                                                 rdev->pm.vblank_sync = true;
5050                                                 wake_up(&rdev->irq.vblank_queue);
5051                                         }
5052                                         if (atomic_read(&rdev->irq.pflip[3]))
5053                                                 radeon_crtc_handle_flip(rdev, 3);
5054                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5055                                         DRM_DEBUG("IH: D4 vblank\n");
5056                                 }
5057                                 break;
5058                         case 1: /* D4 vline */
5059                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5060                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5061                                         DRM_DEBUG("IH: D4 vline\n");
5062                                 }
5063                                 break;
5064                         default:
5065                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5066                                 break;
5067                         }
5068                         break;
5069                 case 5: /* D5 vblank/vline */
5070                         switch (src_data) {
5071                         case 0: /* D5 vblank */
5072                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5073                                         if (rdev->irq.crtc_vblank_int[4]) {
5074                                                 drm_handle_vblank(rdev->ddev, 4);
5075                                                 rdev->pm.vblank_sync = true;
5076                                                 wake_up(&rdev->irq.vblank_queue);
5077                                         }
5078                                         if (atomic_read(&rdev->irq.pflip[4]))
5079                                                 radeon_crtc_handle_flip(rdev, 4);
5080                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5081                                         DRM_DEBUG("IH: D5 vblank\n");
5082                                 }
5083                                 break;
5084                         case 1: /* D5 vline */
5085                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5086                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5087                                         DRM_DEBUG("IH: D5 vline\n");
5088                                 }
5089                                 break;
5090                         default:
5091                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5092                                 break;
5093                         }
5094                         break;
5095                 case 6: /* D6 vblank/vline */
5096                         switch (src_data) {
5097                         case 0: /* D6 vblank */
5098                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5099                                         if (rdev->irq.crtc_vblank_int[5]) {
5100                                                 drm_handle_vblank(rdev->ddev, 5);
5101                                                 rdev->pm.vblank_sync = true;
5102                                                 wake_up(&rdev->irq.vblank_queue);
5103                                         }
5104                                         if (atomic_read(&rdev->irq.pflip[5]))
5105                                                 radeon_crtc_handle_flip(rdev, 5);
5106                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5107                                         DRM_DEBUG("IH: D6 vblank\n");
5108                                 }
5109                                 break;
5110                         case 1: /* D6 vline */
5111                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5112                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5113                                         DRM_DEBUG("IH: D6 vline\n");
5114                                 }
5115                                 break;
5116                         default:
5117                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5118                                 break;
5119                         }
5120                         break;
5121                 case 42: /* HPD hotplug */
5122                         switch (src_data) {
5123                         case 0:
5124                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5125                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
5126                                         queue_hotplug = true;
5127                                         DRM_DEBUG("IH: HPD1\n");
5128                                 }
5129                                 break;
5130                         case 1:
5131                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5132                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5133                                         queue_hotplug = true;
5134                                         DRM_DEBUG("IH: HPD2\n");
5135                                 }
5136                                 break;
5137                         case 2:
5138                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5139                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5140                                         queue_hotplug = true;
5141                                         DRM_DEBUG("IH: HPD3\n");
5142                                 }
5143                                 break;
5144                         case 3:
5145                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5146                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5147                                         queue_hotplug = true;
5148                                         DRM_DEBUG("IH: HPD4\n");
5149                                 }
5150                                 break;
5151                         case 4:
5152                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5153                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5154                                         queue_hotplug = true;
5155                                         DRM_DEBUG("IH: HPD5\n");
5156                                 }
5157                                 break;
5158                         case 5:
5159                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5160                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5161                                         queue_hotplug = true;
5162                                         DRM_DEBUG("IH: HPD6\n");
5163                                 }
5164                                 break;
5165                         default:
5166                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5167                                 break;
5168                         }
5169                         break;
5170                 case 146:
5171                 case 147:
5172                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5173                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5174                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5175                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5176                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5177                         /* reset addr and status */
5178                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5179                         break;
5180                 case 176: /* RINGID0 CP_INT */
5181                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5182                         break;
5183                 case 177: /* RINGID1 CP_INT */
5184                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5185                         break;
5186                 case 178: /* RINGID2 CP_INT */
5187                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5188                         break;
5189                 case 181: /* CP EOP event */
5190                         DRM_DEBUG("IH: CP EOP\n");
5191                         switch (ring_id) {
5192                         case 0:
5193                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5194                                 break;
5195                         case 1:
5196                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5197                                 break;
5198                         case 2:
5199                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5200                                 break;
5201                         }
5202                         break;
5203                 case 224: /* DMA trap event */
5204                         DRM_DEBUG("IH: DMA trap\n");
5205                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5206                         break;
5207                 case 233: /* GUI IDLE */
5208                         DRM_DEBUG("IH: GUI idle\n");
5209                         break;
5210                 case 244: /* DMA trap event */
5211                         DRM_DEBUG("IH: DMA1 trap\n");
5212                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5213                         break;
5214                 default:
5215                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5216                         break;
5217                 }
5218
5219                 /* wptr/rptr are in bytes! */
5220                 rptr += 16;
5221                 rptr &= rdev->ih.ptr_mask;
5222         }
5223         if (queue_hotplug)
5224                 schedule_work(&rdev->hotplug_work);
5225         rdev->ih.rptr = rptr;
5226         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5227         atomic_set(&rdev->ih.lock, 0);
5228
5229         /* make sure wptr hasn't changed while processing */
5230         wptr = si_get_ih_wptr(rdev);
5231         if (wptr != rptr)
5232                 goto restart_ih;
5233
5234         return IRQ_HANDLED;
5235 }
5236
5237 /**
5238  * si_copy_dma - copy pages using the DMA engine
5239  *
5240  * @rdev: radeon_device pointer
5241  * @src_offset: src GPU address
5242  * @dst_offset: dst GPU address
5243  * @num_gpu_pages: number of GPU pages to xfer
5244  * @fence: radeon fence object
5245  *
5246  * Copy GPU paging using the DMA engine (SI).
5247  * Used by the radeon ttm implementation to move pages if
5248  * registered as the asic copy callback.
5249  */
5250 int si_copy_dma(struct radeon_device *rdev,
5251                 uint64_t src_offset, uint64_t dst_offset,
5252                 unsigned num_gpu_pages,
5253                 struct radeon_fence **fence)
5254 {
5255         struct radeon_semaphore *sem = NULL;
5256         int ring_index = rdev->asic->copy.dma_ring_index;
5257         struct radeon_ring *ring = &rdev->ring[ring_index];
5258         u32 size_in_bytes, cur_size_in_bytes;
5259         int i, num_loops;
5260         int r = 0;
5261
5262         r = radeon_semaphore_create(rdev, &sem);
5263         if (r) {
5264                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5265                 return r;
5266         }
5267
5268         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5269         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5270         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5271         if (r) {
5272                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5273                 radeon_semaphore_free(rdev, &sem, NULL);
5274                 return r;
5275         }
5276
5277         if (radeon_fence_need_sync(*fence, ring->idx)) {
5278                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5279                                             ring->idx);
5280                 radeon_fence_note_sync(*fence, ring->idx);
5281         } else {
5282                 radeon_semaphore_free(rdev, &sem, NULL);
5283         }
5284
5285         for (i = 0; i < num_loops; i++) {
5286                 cur_size_in_bytes = size_in_bytes;
5287                 if (cur_size_in_bytes > 0xFFFFF)
5288                         cur_size_in_bytes = 0xFFFFF;
5289                 size_in_bytes -= cur_size_in_bytes;
5290                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5291                 radeon_ring_write(ring, dst_offset & 0xffffffff);
5292                 radeon_ring_write(ring, src_offset & 0xffffffff);
5293                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5294                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5295                 src_offset += cur_size_in_bytes;
5296                 dst_offset += cur_size_in_bytes;
5297         }
5298
5299         r = radeon_fence_emit(rdev, fence, ring->idx);
5300         if (r) {
5301                 radeon_ring_unlock_undo(rdev, ring);
5302                 return r;
5303         }
5304
5305         radeon_ring_unlock_commit(rdev, ring);
5306         radeon_semaphore_free(rdev, &sem, *fence);
5307
5308         return r;
5309 }
5310
5311 /*
5312  * startup/shutdown callbacks
5313  */
5314 static int si_startup(struct radeon_device *rdev)
5315 {
5316         struct radeon_ring *ring;
5317         int r;
5318
5319         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5320             !rdev->rlc_fw || !rdev->mc_fw) {
5321                 r = si_init_microcode(rdev);
5322                 if (r) {
5323                         DRM_ERROR("Failed to load firmware!\n");
5324                         return r;
5325                 }
5326         }
5327
5328         r = si_mc_load_microcode(rdev);
5329         if (r) {
5330                 DRM_ERROR("Failed to load MC firmware!\n");
5331                 return r;
5332         }
5333
5334         r = r600_vram_scratch_init(rdev);
5335         if (r)
5336                 return r;
5337
5338         si_mc_program(rdev);
5339         r = si_pcie_gart_enable(rdev);
5340         if (r)
5341                 return r;
5342         si_gpu_init(rdev);
5343
5344         /* allocate rlc buffers */
5345         r = si_rlc_init(rdev);
5346         if (r) {
5347                 DRM_ERROR("Failed to init rlc BOs!\n");
5348                 return r;
5349         }
5350
5351         /* allocate wb buffer */
5352         r = radeon_wb_init(rdev);
5353         if (r)
5354                 return r;
5355
5356         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5357         if (r) {
5358                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5359                 return r;
5360         }
5361
5362         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5363         if (r) {
5364                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5365                 return r;
5366         }
5367
5368         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5369         if (r) {
5370                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5371                 return r;
5372         }
5373
5374         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5375         if (r) {
5376                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5377                 return r;
5378         }
5379
5380         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5381         if (r) {
5382                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5383                 return r;
5384         }
5385
5386         if (rdev->has_uvd) {
5387                 r = rv770_uvd_resume(rdev);
5388                 if (!r) {
5389                         r = radeon_fence_driver_start_ring(rdev,
5390                                                            R600_RING_TYPE_UVD_INDEX);
5391                         if (r)
5392                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5393                 }
5394                 if (r)
5395                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5396         }
5397
5398         /* Enable IRQ */
5399         if (!rdev->irq.installed) {
5400                 r = radeon_irq_kms_init(rdev);
5401                 if (r)
5402                         return r;
5403         }
5404
5405         r = si_irq_init(rdev);
5406         if (r) {
5407                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5408                 radeon_irq_kms_fini(rdev);
5409                 return r;
5410         }
5411         si_irq_set(rdev);
5412
5413         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5414         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5415                              CP_RB0_RPTR, CP_RB0_WPTR,
5416                              0, 0xfffff, RADEON_CP_PACKET2);
5417         if (r)
5418                 return r;
5419
5420         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5421         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5422                              CP_RB1_RPTR, CP_RB1_WPTR,
5423                              0, 0xfffff, RADEON_CP_PACKET2);
5424         if (r)
5425                 return r;
5426
5427         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5428         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5429                              CP_RB2_RPTR, CP_RB2_WPTR,
5430                              0, 0xfffff, RADEON_CP_PACKET2);
5431         if (r)
5432                 return r;
5433
5434         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5435         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5436                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5437                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5438                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5439         if (r)
5440                 return r;
5441
5442         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5443         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5444                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5445                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5446                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5447         if (r)
5448                 return r;
5449
5450         r = si_cp_load_microcode(rdev);
5451         if (r)
5452                 return r;
5453         r = si_cp_resume(rdev);
5454         if (r)
5455                 return r;
5456
5457         r = cayman_dma_resume(rdev);
5458         if (r)
5459                 return r;
5460
5461         if (rdev->has_uvd) {
5462                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5463                 if (ring->ring_size) {
5464                         r = radeon_ring_init(rdev, ring, ring->ring_size,
5465                                              R600_WB_UVD_RPTR_OFFSET,
5466                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5467                                              0, 0xfffff, RADEON_CP_PACKET2);
5468                         if (!r)
5469                                 r = r600_uvd_init(rdev);
5470                         if (r)
5471                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5472                 }
5473         }
5474
5475         r = radeon_ib_pool_init(rdev);
5476         if (r) {
5477                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5478                 return r;
5479         }
5480
5481         r = radeon_vm_manager_init(rdev);
5482         if (r) {
5483                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5484                 return r;
5485         }
5486
5487         return 0;
5488 }
5489
5490 int si_resume(struct radeon_device *rdev)
5491 {
5492         int r;
5493
5494         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5495          * posting will perform necessary task to bring back GPU into good
5496          * shape.
5497          */
5498         /* post card */
5499         atom_asic_init(rdev->mode_info.atom_context);
5500
5501         /* init golden registers */
5502         si_init_golden_registers(rdev);
5503
5504         rdev->accel_working = true;
5505         r = si_startup(rdev);
5506         if (r) {
5507                 DRM_ERROR("si startup failed on resume\n");
5508                 rdev->accel_working = false;
5509                 return r;
5510         }
5511
5512         return r;
5513
5514 }
5515
5516 int si_suspend(struct radeon_device *rdev)
5517 {
5518         radeon_vm_manager_fini(rdev);
5519         si_cp_enable(rdev, false);
5520         cayman_dma_stop(rdev);
5521         if (rdev->has_uvd) {
5522                 r600_uvd_rbc_stop(rdev);
5523                 radeon_uvd_suspend(rdev);
5524         }
5525         si_irq_suspend(rdev);
5526         radeon_wb_disable(rdev);
5527         si_pcie_gart_disable(rdev);
5528         return 0;
5529 }
5530
5531 /* Plan is to move initialization in that function and use
5532  * helper function so that radeon_device_init pretty much
5533  * do nothing more than calling asic specific function. This
5534  * should also allow to remove a bunch of callback function
5535  * like vram_info.
5536  */
5537 int si_init(struct radeon_device *rdev)
5538 {
5539         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5540         int r;
5541
5542         /* Read BIOS */
5543         if (!radeon_get_bios(rdev)) {
5544                 if (ASIC_IS_AVIVO(rdev))
5545                         return -EINVAL;
5546         }
5547         /* Must be an ATOMBIOS */
5548         if (!rdev->is_atom_bios) {
5549                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5550                 return -EINVAL;
5551         }
5552         r = radeon_atombios_init(rdev);
5553         if (r)
5554                 return r;
5555
5556         /* Post card if necessary */
5557         if (!radeon_card_posted(rdev)) {
5558                 if (!rdev->bios) {
5559                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5560                         return -EINVAL;
5561                 }
5562                 DRM_INFO("GPU not posted. posting now...\n");
5563                 atom_asic_init(rdev->mode_info.atom_context);
5564         }
5565         /* init golden registers */
5566         si_init_golden_registers(rdev);
5567         /* Initialize scratch registers */
5568         si_scratch_init(rdev);
5569         /* Initialize surface registers */
5570         radeon_surface_init(rdev);
5571         /* Initialize clocks */
5572         radeon_get_clock_info(rdev->ddev);
5573
5574         /* Fence driver */
5575         r = radeon_fence_driver_init(rdev);
5576         if (r)
5577                 return r;
5578
5579         /* initialize memory controller */
5580         r = si_mc_init(rdev);
5581         if (r)
5582                 return r;
5583         /* Memory manager */
5584         r = radeon_bo_init(rdev);
5585         if (r)
5586                 return r;
5587
5588         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5589         ring->ring_obj = NULL;
5590         r600_ring_init(rdev, ring, 1024 * 1024);
5591
5592         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5593         ring->ring_obj = NULL;
5594         r600_ring_init(rdev, ring, 1024 * 1024);
5595
5596         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5597         ring->ring_obj = NULL;
5598         r600_ring_init(rdev, ring, 1024 * 1024);
5599
5600         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5601         ring->ring_obj = NULL;
5602         r600_ring_init(rdev, ring, 64 * 1024);
5603
5604         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5605         ring->ring_obj = NULL;
5606         r600_ring_init(rdev, ring, 64 * 1024);
5607
5608         if (rdev->has_uvd) {
5609                 r = radeon_uvd_init(rdev);
5610                 if (!r) {
5611                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5612                         ring->ring_obj = NULL;
5613                         r600_ring_init(rdev, ring, 4096);
5614                 }
5615         }
5616
5617         rdev->ih.ring_obj = NULL;
5618         r600_ih_ring_init(rdev, 64 * 1024);
5619
5620         r = r600_pcie_gart_init(rdev);
5621         if (r)
5622                 return r;
5623
5624         rdev->accel_working = true;
5625         r = si_startup(rdev);
5626         if (r) {
5627                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5628                 si_cp_fini(rdev);
5629                 cayman_dma_fini(rdev);
5630                 si_irq_fini(rdev);
5631                 si_rlc_fini(rdev);
5632                 radeon_wb_fini(rdev);
5633                 radeon_ib_pool_fini(rdev);
5634                 radeon_vm_manager_fini(rdev);
5635                 radeon_irq_kms_fini(rdev);
5636                 si_pcie_gart_fini(rdev);
5637                 rdev->accel_working = false;
5638         }
5639
5640         /* Don't start up if the MC ucode is missing.
5641          * The default clocks and voltages before the MC ucode
5642          * is loaded are not suffient for advanced operations.
5643          */
5644         if (!rdev->mc_fw) {
5645                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5646                 return -EINVAL;
5647         }
5648
5649         return 0;
5650 }
5651
5652 void si_fini(struct radeon_device *rdev)
5653 {
5654         si_cp_fini(rdev);
5655         cayman_dma_fini(rdev);
5656         si_irq_fini(rdev);
5657         si_rlc_fini(rdev);
5658         radeon_wb_fini(rdev);
5659         radeon_vm_manager_fini(rdev);
5660         radeon_ib_pool_fini(rdev);
5661         radeon_irq_kms_fini(rdev);
5662         if (rdev->has_uvd)
5663                 radeon_uvd_fini(rdev);
5664         si_pcie_gart_fini(rdev);
5665         r600_vram_scratch_fini(rdev);
5666         radeon_gem_fini(rdev);
5667         radeon_fence_driver_fini(rdev);
5668         radeon_bo_fini(rdev);
5669         radeon_atombios_fini(rdev);
5670         kfree(rdev->bios);
5671         rdev->bios = NULL;
5672 }
5673
5674 /**
5675  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5676  *
5677  * @rdev: radeon_device pointer
5678  *
5679  * Fetches a GPU clock counter snapshot (SI).
5680  * Returns the 64 bit clock counter snapshot.
5681  */
5682 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5683 {
5684         uint64_t clock;
5685
5686         mutex_lock(&rdev->gpu_clock_mutex);
5687         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5688         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5689                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5690         mutex_unlock(&rdev->gpu_clock_mutex);
5691         return clock;
5692 }
5693
5694 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5695 {
5696         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5697         int r;
5698
5699         /* bypass vclk and dclk with bclk */
5700         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5701                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5702                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5703
5704         /* put PLL in bypass mode */
5705         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5706
5707         if (!vclk || !dclk) {
5708                 /* keep the Bypass mode, put PLL to sleep */
5709                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5710                 return 0;
5711         }
5712
5713         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5714                                           16384, 0x03FFFFFF, 0, 128, 5,
5715                                           &fb_div, &vclk_div, &dclk_div);
5716         if (r)
5717                 return r;
5718
5719         /* set RESET_ANTI_MUX to 0 */
5720         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5721
5722         /* set VCO_MODE to 1 */
5723         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5724
5725         /* toggle UPLL_SLEEP to 1 then back to 0 */
5726         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5727         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5728
5729         /* deassert UPLL_RESET */
5730         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5731
5732         mdelay(1);
5733
5734         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5735         if (r)
5736                 return r;
5737
5738         /* assert UPLL_RESET again */
5739         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5740
5741         /* disable spread spectrum. */
5742         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5743
5744         /* set feedback divider */
5745         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5746
5747         /* set ref divider to 0 */
5748         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5749
5750         if (fb_div < 307200)
5751                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5752         else
5753                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5754
5755         /* set PDIV_A and PDIV_B */
5756         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5757                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5758                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5759
5760         /* give the PLL some time to settle */
5761         mdelay(15);
5762
5763         /* deassert PLL_RESET */
5764         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5765
5766         mdelay(15);
5767
5768         /* switch from bypass mode to normal mode */
5769         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5770
5771         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5772         if (r)
5773                 return r;
5774
5775         /* switch VCLK and DCLK selection */
5776         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5777                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5778                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5779
5780         mdelay(100);
5781
5782         return 0;
5783 }